deepsync commited on
Commit
f69fcbb
·
verified ·
1 Parent(s): 70fdbb7

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +90 -0
app.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import csv # Import csv here as well if not importing the whole validator.py
3
+
4
+ # You can either paste the validate_tsv_file function directly here
5
+ # OR import it if it's in a separate file (e.g., from validator import validate_tsv_file)
6
+
7
+ # --- Paste the validate_tsv_file function here if not importing ---
8
+ def validate_tsv_file(file_path):
9
+ """
10
+ Validates a TSV file according to the specified rules.
11
+
12
+ Args:
13
+ file_path (str): The path to the TSV file.
14
+
15
+ Returns:
16
+ str: A message indicating whether the file is valid or listing errors.
17
+ """
18
+ errors = []
19
+ last_end_time = None # To store the end_time of the previous row
20
+
21
+ try:
22
+ with open(file_path, 'r', encoding='utf-8', newline='') as tsvfile:
23
+ reader = csv.reader(tsvfile, delimiter='\t')
24
+ for row_num, row in enumerate(reader, 1):
25
+ # Check for correct number of columns
26
+ if len(row) != 3:
27
+ errors.append(f"Row {row_num}: Expected 3 columns, but found {len(row)}. Content: '{' | '.join(row)}'")
28
+ last_end_time = None
29
+ continue
30
+
31
+ start_time_str, end_time_str, text_content = row
32
+
33
+ # Validate timestamps are numbers
34
+ try:
35
+ start_time = float(start_time_str)
36
+ end_time = float(end_time_str)
37
+ except ValueError:
38
+ errors.append(f"Row {row_num}: Timestamps must be numeric. Found start='{start_time_str}', end='{end_time_str}'.")
39
+ last_end_time = None
40
+ continue
41
+
42
+ # Rule 1: End timestamp > Start timestamp
43
+ if end_time <= start_time:
44
+ errors.append(f"Row {row_num}: End timestamp ({end_time}) must be greater than start timestamp ({start_time}).")
45
+
46
+ # Rule 2: Current row's start_time > previous row's end_time
47
+ if last_end_time is not None:
48
+ if start_time <= last_end_time: # Must be strictly greater
49
+ errors.append(f"Row {row_num}: Start timestamp ({start_time}) must be greater than the previous segment's end timestamp ({last_end_time}).")
50
+
51
+ last_end_time = end_time
52
+
53
+ except FileNotFoundError:
54
+ return f"Error: File not found at '{file_path}'."
55
+ except Exception as e:
56
+ return f"An unexpected error occurred: {str(e)}"
57
+
58
+ if not errors:
59
+ return "TSV file is valid and follows all timestamp rules!"
60
+ else:
61
+ return "TSV file has the following issues:\n" + "\n".join(errors)
62
+ # --- End of pasted/imported function ---
63
+
64
+
65
+ def handle_validation(uploaded_file_obj):
66
+ if uploaded_file_obj is None:
67
+ return "Please upload a TSV file first."
68
+
69
+ # Gradio provides a temporary file object. We need its path.
70
+ file_path = uploaded_file_obj.name
71
+
72
+ return validate_tsv_file(file_path)
73
+
74
+ # --- Gradio Interface Definition ---
75
+ iface = gr.Interface(
76
+ fn=handle_validation,
77
+ inputs=gr.File(label="Upload TSV File (.tsv)", file_types=[".tsv"]),
78
+ outputs=gr.Textbox(label="Validation Result", lines=15, show_copy_button=True),
79
+ title="TSV Timestamp Validator",
80
+ description="Upload a TSV file with columns: `start_timestamp` (tab) `end_timestamp` (tab) `text`. "
81
+ "This tool checks if: \n"
82
+ "1. Each row has 3 columns.\n"
83
+ "2. Timestamps are numeric.\n"
84
+ "3. End timestamp > Start timestamp for each segment.\n"
85
+ "4. Start timestamp of a segment > End timestamp of the previous segment.",
86
+ allow_flagging="never" # Usually not needed for utility apps
87
+ )
88
+
89
+ if __name__ == "__main__":
90
+ iface.launch()