Spaces:
Sleeping
Sleeping
File size: 8,299 Bytes
6829fd5 2e05b67 6829fd5 2e05b67 6829fd5 f8734ec 6829fd5 3f5be66 6829fd5 295a965 6829fd5 295a965 6829fd5 295a965 6829fd5 295a965 6829fd5 a295957 6829fd5 82c8dd7 6829fd5 3f5be66 82c8dd7 a295957 6829fd5 295a965 6829fd5 82c8dd7 6829fd5 295a965 6829fd5 3f5be66 6829fd5 2e05b67 6829fd5 c8a9cbc 2e05b67 3f5be66 6829fd5 2e05b67 6829fd5 2e05b67 3f5be66 6829fd5 2e05b67 6829fd5 3f5be66 6829fd5 2e05b67 6829fd5 3f5be66 6829fd5 a295957 3f5be66 6829fd5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 |
import torch
try:
print(f"Is CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
try:
print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
except Exception as e:
print(f"Error getting CUDA device name: {str(e)}")
else:
print("No CUDA device available - using CPU")
except Exception as e:
print(f"Error checking CUDA availability: {str(e)}")
print("Continuing with CPU...")
from modules.logging_config import setup_logging
setup_logging()
import logging
logger = logging.getLogger(__name__)
import streamlit as st
import os
from huggingface_hub import login
from datetime import datetime
from modules.auth import validate_login
from modules.utils import create_excel, clean_text, extract_predicted_labels, predict_category, process_data
# Local
# from dotenv import load_dotenv
# load_dotenv()
# Main app logic
def main():
# Temporarily set authentication to True for testing
if 'authenticated' not in st.session_state:
st.session_state['authenticated'] = False
if st.session_state['authenticated']:
# Remove login success message for testing
hf_token = os.environ["HF_TOKEN"]
login(token=hf_token, add_to_git_credential=True)
# Initialize session state variables
if 'data_processed' not in st.session_state:
st.session_state['data_processed'] = False
st.session_state['df'] = None
# Main Streamlit app
st.title('Application Pre-Filtering Tool')
# Sidebar (filters)
with st.sidebar:
with st.expander("ℹ️ - Instructions", expanded=False):
st.markdown(
"""
1. **Download the Excel Template file (below)**
2. **[OPTIONAL]: Select the desired filtering sensitivity level (below)**
3. **Copy/paste the requisite application data in the template file. Best practice is to 'paste as values'**
4. **Upload the template file in the area to the right (or click browse files)**
5. **Click 'Start Analysis'**
The tool will start processing the uploaded application data. This can take some time
depending on the number of applications and the length of text in each. For example, a file with 1000 applications
could be expected to take approximately 5 minutes.
***NOTE (1)** - you can also simply rename the column headers in your own file. The headers must match the column names in the template for the tool to run properly.*
"""
)
# Excel file download
st.download_button(
label="Download Excel Template",
data=create_excel(),
file_name="upload_template.xlsx",
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
)
# get sensitivity level for use in review / reject (ref. process_data function)
sens_options = {
"Low": 4,
"Medium": 5,
"High": 6,
}
sens_input = st.sidebar.radio(label = 'Select the Sensitivity Level [OPTIONAL]',
help = 'Decreasing the level of sensitivity results in less \
applications filtered out. This also \
reduces the probability of false negatives (FNs). The rate of \
FNs at the lowest setting is approximately 6 percent, and \
approaches 13 percent at the highest setting. \
NOTE: changing this setting does not affect the raw data in the CSV output file (only the labels)',
options = list(sens_options.keys()),
index = list(sens_options.keys()).index("High"),
horizontal = False)
sens_level = sens_options[sens_input]
with st.expander("ℹ️ - About this app", expanded=False):
st.write(
"""
This tool provides an interface for running an automated preliminary assessment of applications for a call for applications.
The tool functions by running selected text fields from the application through a series of LLMs fine-tuned for text classification (ref. diagram below).
The resulting output classifications are used to compute a score and a suggested pre-filtering action. The tool has been tested against
human assessors and exhibits an extremely low false negative rate (<6%) at a Sensitivity Level of 'Low' (i.e. rejection threshold for predicted score < 4).
""")
st.image('images/pipeline.png')
uploaded_file = st.file_uploader("Select a file containing application pre-filtering data (see instructions in the sidebar)")
# Add session state variables if they don't exist
if 'show_button' not in st.session_state:
st.session_state['show_button'] = True
if 'processing' not in st.session_state:
st.session_state['processing'] = False
if 'data_processed' not in st.session_state:
st.session_state['data_processed'] = False
# Only show the button if show_button is True and file is uploaded and not processing
if uploaded_file is not None and st.session_state['show_button'] and not st.session_state['processing']:
if st.button("Start Analysis", key="start_analysis"):
st.session_state['show_button'] = False
st.session_state['processing'] = True
st.rerun()
# If we're processing, show the processing logic
if st.session_state['processing']:
try:
logger.info(f"File uploaded: {uploaded_file.name}")
if not st.session_state['data_processed']:
logger.info("Starting data processing")
try:
st.session_state['df'] = process_data(uploaded_file, sens_level)
logger.info("Data processing completed successfully")
st.session_state['data_processed'] = True
except Exception as e:
logger.error(f"Error in process_data: {str(e)}")
raise
df = st.session_state['df']
current_datetime = datetime.now().strftime('%d-%m-%Y_%H-%M-%S')
output_filename = f'processed_applications_{current_datetime}.csv'
csv_buffer = df.to_csv(index=False).encode()
logger.info("CSV buffer created successfully")
def reset_button_state():
st.session_state['show_button'] = True
st.session_state['processing'] = False
st.session_state['data_processed'] = False
st.download_button(
label="Download data as CSV",
data=csv_buffer,
file_name=output_filename,
mime='text/csv',
on_click=reset_button_state
)
except Exception as e:
logger.error(f"Error processing file: {str(e)}")
st.error("Failed to process the file. Please ensure your column names match the template file.")
st.session_state['show_button'] = True
st.session_state['processing'] = False
st.rerun()
# Comment out for testing
else:
username = st.text_input("Username")
password = st.text_input("Password", type="password")
if st.button("Login"):
if validate_login(username, password):
st.session_state['authenticated'] = True
st.rerun()
else:
st.error("Incorrect username or password")
main()
|