Spaces:
Running
Running
import gradio as gr | |
import csv # Import csv here as well if not importing the whole validator.py | |
# You can either paste the validate_tsv_file function directly here | |
# OR import it if it's in a separate file (e.g., from validator import validate_tsv_file) | |
# --- Paste the validate_tsv_file function here if not importing --- | |
def validate_tsv_file(file_path): | |
""" | |
Validates a TSV file according to the specified rules. | |
Args: | |
file_path (str): The path to the TSV file. | |
Returns: | |
str: A message indicating whether the file is valid or listing errors. | |
""" | |
errors = [] | |
last_end_time = None # To store the end_time of the previous row | |
try: | |
with open(file_path, 'r', encoding='utf-8', newline='') as tsvfile: | |
reader = csv.reader(tsvfile, delimiter='\t') | |
for row_num, row in enumerate(reader, 1): | |
# Check for correct number of columns | |
if len(row) != 3: | |
errors.append(f"Row {row_num}: Expected 3 columns, but found {len(row)}. Content: '{' | '.join(row)}'") | |
last_end_time = None | |
continue | |
start_time_str, end_time_str, text_content = row | |
# Validate timestamps are numbers | |
try: | |
start_time = float(start_time_str) | |
end_time = float(end_time_str) | |
except ValueError: | |
errors.append(f"Row {row_num}: Timestamps must be numeric. Found start='{start_time_str}', end='{end_time_str}'.") | |
last_end_time = None | |
continue | |
# Rule 1: End timestamp > Start timestamp | |
if end_time <= start_time: | |
errors.append(f"Row {row_num}: End timestamp ({end_time}) must be greater than start timestamp ({start_time}).") | |
# Rule 2: Current row's start_time > previous row's end_time | |
if last_end_time is not None: | |
if start_time <= last_end_time: # Must be strictly greater | |
errors.append(f"Row {row_num}: Start timestamp ({start_time}) must be greater than the previous segment's end timestamp ({last_end_time}).") | |
last_end_time = end_time | |
except FileNotFoundError: | |
return f"Error: File not found at '{file_path}'." | |
except Exception as e: | |
return f"An unexpected error occurred: {str(e)}" | |
if not errors: | |
return "TSV file is valid and follows all timestamp rules!" | |
else: | |
return "TSV file has the following issues:\n" + "\n".join(errors) | |
# --- End of pasted/imported function --- | |
def handle_validation(uploaded_file_obj): | |
if uploaded_file_obj is None: | |
return "Please upload a TSV file first." | |
# Gradio provides a temporary file object. We need its path. | |
file_path = uploaded_file_obj.name | |
return validate_tsv_file(file_path) | |
# --- Gradio Interface Definition --- | |
iface = gr.Interface( | |
fn=handle_validation, | |
inputs=gr.File(label="Upload TSV File (.tsv or .txt)", file_types=[".tsv", ".txt"]), | |
outputs=gr.Textbox(label="Validation Result", lines=15, show_copy_button=True), | |
title="TSV Timestamp Validator", | |
description="Upload a TSV file with columns: `start_timestamp` (tab) `end_timestamp` (tab) `text`. " | |
"This tool checks if: \n" | |
"1. Each row has 3 columns.\n" | |
"2. Timestamps are numeric.\n" | |
"3. End timestamp > Start timestamp for each segment.\n" | |
"4. Start timestamp of a segment > End timestamp of the previous segment.", | |
allow_flagging="never" # Usually not needed for utility apps | |
) | |
if __name__ == "__main__": | |
iface.launch() |