Spaces:
Running
Running
Now don't have to have join column in reference df. Can remove input files without error message.
8c163ee
import gradio as gr | |
import pandas as pd | |
import os | |
import re | |
def get_or_create_env_var(var_name, default_value): | |
# Get the environment variable if it exists | |
value = os.environ.get(var_name) | |
# If it doesn't exist, set it to the default value | |
if value is None: | |
os.environ[var_name] = default_value | |
value = default_value | |
return value | |
# Retrieving or setting output folder | |
env_var_name = 'GRADIO_OUTPUT_FOLDER' | |
default_value = 'output/' | |
output_folder = get_or_create_env_var(env_var_name, default_value) | |
print(f'The value of {env_var_name} is {output_folder}') | |
def detect_file_type(filename): | |
"""Detect the file type based on its extension.""" | |
if (filename.endswith('.csv')) | (filename.endswith('.csv.gz')) | (filename.endswith('.zip')): | |
return 'csv' | |
elif filename.endswith('.xlsx'): | |
return 'xlsx' | |
elif filename.endswith('.parquet'): | |
return 'parquet' | |
else: | |
raise ValueError("Unsupported file type.") | |
def read_file(filename): | |
"""Read the file based on its detected type.""" | |
file_type = detect_file_type(filename) | |
if file_type == 'csv': | |
return pd.read_csv(filename, low_memory=False) | |
elif file_type == 'xlsx': | |
return pd.read_excel(filename) | |
elif file_type == 'parquet': | |
return pd.read_parquet(filename) | |
def initial_data_load(in_file): | |
new_choices = [] | |
concat_choices = [] | |
output_message = "" | |
results_df = pd.DataFrame() | |
df = pd.DataFrame() | |
if not in_file: | |
return "No files provided.", gr.Dropdown(choices=[]), gr.Dropdown(choices=[]), df, results_df | |
file_list = [string.name for string in in_file] | |
data_file_names = [string for string in file_list if "results_on_orig" not in string.lower()] | |
if data_file_names: | |
df = read_file(data_file_names[0]) | |
else: | |
error_message = "No data file found." | |
return error_message, gr.Dropdown(choices=concat_choices), gr.Dropdown(choices=concat_choices), df, results_df | |
results_file_names = [string for string in file_list if "results_on_orig" in string.lower()] | |
if results_file_names: | |
results_df = read_file(results_file_names[0]) | |
new_choices = list(df.columns) | |
concat_choices.extend(new_choices) | |
output_message = "Data successfully loaded" | |
return output_message, gr.Dropdown(choices=concat_choices), gr.Dropdown(choices=concat_choices), df, results_df | |
def ensure_output_folder_exists(output_folder): | |
"""Checks if the output folder exists, creates it if not.""" | |
folder_name = output_folder | |
if not os.path.exists(folder_name): | |
# Create the folder if it doesn't exist | |
os.makedirs(folder_name) | |
print(f"Created the output folder:", folder_name) | |
else: | |
print(f"The output folder already exists:", folder_name) | |
def dummy_function(in_colnames): | |
""" | |
A dummy function that exists just so that dropdown updates work correctly. | |
""" | |
return None | |
# Upon running a process, the feedback buttons are revealed | |
def reveal_feedback_buttons(): | |
return gr.Radio(visible=True), gr.Textbox(visible=True), gr.Button(visible=True), gr.Markdown(visible=True) | |
def clear_inputs(in_file, in_ref, in_text): | |
return gr.File(value=[]), gr.File(value=[]), gr.Textbox(value='') | |
## Get final processing time for logs: | |
def sum_numbers_before_seconds(string): | |
"""Extracts numbers that precede the word 'seconds' from a string and adds them up. | |
Args: | |
string: The input string. | |
Returns: | |
The sum of all numbers before 'seconds' in the string. | |
""" | |
# Extract numbers before 'seconds' using regular expression | |
numbers = re.findall(r'(\d+\.\d+)?\s*seconds', string) | |
# Extract the numbers from the matches | |
numbers = [float(num.split()[0]) for num in numbers] | |
# Sum up the extracted numbers | |
sum_of_numbers = round(sum(numbers),1) | |
return sum_of_numbers | |
async def get_connection_params(request: gr.Request): | |
base_folder = "" | |
if request: | |
#print("request user:", request.username) | |
#request_data = await request.json() # Parse JSON body | |
#print("All request data:", request_data) | |
#context_value = request_data.get('context') | |
#if 'context' in request_data: | |
# print("Request context dictionary:", request_data['context']) | |
# print("Request headers dictionary:", request.headers) | |
# print("All host elements", request.client) | |
# print("IP address:", request.client.host) | |
# print("Query parameters:", dict(request.query_params)) | |
# To get the underlying FastAPI items you would need to use await and some fancy @ stuff for a live query: https://fastapi.tiangolo.com/vi/reference/request/ | |
#print("Request dictionary to object:", request.request.body()) | |
print("Session hash:", request.session_hash) | |
# Retrieving or setting CUSTOM_CLOUDFRONT_HEADER | |
CUSTOM_CLOUDFRONT_HEADER_var = get_or_create_env_var('CUSTOM_CLOUDFRONT_HEADER', '') | |
#print(f'The value of CUSTOM_CLOUDFRONT_HEADER is {CUSTOM_CLOUDFRONT_HEADER_var}') | |
# Retrieving or setting CUSTOM_CLOUDFRONT_HEADER_VALUE | |
CUSTOM_CLOUDFRONT_HEADER_VALUE_var = get_or_create_env_var('CUSTOM_CLOUDFRONT_HEADER_VALUE', '') | |
#print(f'The value of CUSTOM_CLOUDFRONT_HEADER_VALUE_var is {CUSTOM_CLOUDFRONT_HEADER_VALUE_var}') | |
if CUSTOM_CLOUDFRONT_HEADER_var and CUSTOM_CLOUDFRONT_HEADER_VALUE_var: | |
if CUSTOM_CLOUDFRONT_HEADER_var in request.headers: | |
supplied_cloudfront_custom_value = request.headers[CUSTOM_CLOUDFRONT_HEADER_var] | |
if supplied_cloudfront_custom_value == CUSTOM_CLOUDFRONT_HEADER_VALUE_var: | |
print("Custom Cloudfront header found:", supplied_cloudfront_custom_value) | |
else: | |
raise(ValueError, "Custom Cloudfront header value does not match expected value.") | |
# Get output save folder from 1 - username passed in from direct Cognito login, 2 - Cognito ID header passed through a Lambda authenticator, 3 - the session hash. | |
if request.username: | |
out_session_hash = request.username | |
base_folder = "user-files/" | |
print("Request username found:", out_session_hash) | |
elif 'x-cognito-id' in request.headers: | |
out_session_hash = request.headers['x-cognito-id'] | |
base_folder = "user-files/" | |
print("Cognito ID found:", out_session_hash) | |
else: | |
out_session_hash = request.session_hash | |
base_folder = "temp-files/" | |
# print("Cognito ID not found. Using session hash as save folder:", out_session_hash) | |
output_folder = base_folder + out_session_hash + "/" | |
#if bucket_name: | |
# print("S3 output folder is: " + "s3://" + bucket_name + "/" + output_folder) | |
return out_session_hash, output_folder, out_session_hash | |
else: | |
print("No session parameters found.") | |
return "","" | |