Spaces:
Running
Running
Olivier CARON
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
import os
|
2 |
import csv
|
3 |
import streamlit as st
|
4 |
import polars as pl
|
@@ -16,19 +16,18 @@ def load_data(file):
|
|
16 |
if file_ext.lower() in ['.xls', '.xlsx']:
|
17 |
return pl.read_excel(file)
|
18 |
elif file_ext.lower() == '.csv':
|
19 |
-
file.seek(0) #
|
20 |
try:
|
21 |
-
sample = file.read(4096).decode('utf-8') #
|
22 |
encoding = 'utf-8'
|
23 |
except UnicodeDecodeError:
|
24 |
-
encoding = 'latin1' #
|
25 |
file.seek(0)
|
26 |
sample = file.read(4096).decode(encoding)
|
27 |
|
28 |
file.seek(0)
|
29 |
-
dialect = csv.Sniffer().sniff(sample) #
|
30 |
|
31 |
-
# Convertir le fichier en StringIO pour simuler un fichier texte, si nécessaire
|
32 |
file.seek(0)
|
33 |
if encoding != 'utf-8':
|
34 |
file_content = file.read().decode(encoding)
|
@@ -49,10 +48,10 @@ def perform_ner(filtered_df, selected_column, labels_list):
|
|
49 |
progress_bar = st.progress(0)
|
50 |
progress_text = st.empty()
|
51 |
|
52 |
-
start_time = time.time() #
|
53 |
|
54 |
for index, row in enumerate(filtered_df.to_pandas().itertuples(), 1):
|
55 |
-
iteration_start_time = time.time() #
|
56 |
|
57 |
if st.session_state.stop_processing:
|
58 |
progress_text.text("Process stopped by the user.")
|
@@ -69,13 +68,13 @@ def perform_ner(filtered_df, selected_column, labels_list):
|
|
69 |
progress = index / filtered_df.height
|
70 |
progress_bar.progress(progress)
|
71 |
|
72 |
-
iteration_time = time.time() - iteration_start_time #
|
73 |
-
total_time = time.time() - start_time #
|
74 |
|
75 |
progress_text.text(f"Progress: {index}/{filtered_df.height} - {progress * 100:.0f}% (Iteration: {iteration_time:.2f}s, Total: {total_time:.2f}s)")
|
76 |
|
77 |
-
end_time = time.time() #
|
78 |
-
total_execution_time = end_time - start_time #
|
79 |
|
80 |
progress_text.text(f"Processing complete! Total execution time: {total_execution_time:.2f}s")
|
81 |
|
|
|
1 |
+
import os
|
2 |
import csv
|
3 |
import streamlit as st
|
4 |
import polars as pl
|
|
|
16 |
if file_ext.lower() in ['.xls', '.xlsx']:
|
17 |
return pl.read_excel(file)
|
18 |
elif file_ext.lower() == '.csv':
|
19 |
+
file.seek(0) # Go back to the beginning of the file
|
20 |
try:
|
21 |
+
sample = file.read(4096).decode('utf-8') # Try to decode the sample in UTF-8
|
22 |
encoding = 'utf-8'
|
23 |
except UnicodeDecodeError:
|
24 |
+
encoding = 'latin1' # Switch to 'latin1' if UTF-8 fails
|
25 |
file.seek(0)
|
26 |
sample = file.read(4096).decode(encoding)
|
27 |
|
28 |
file.seek(0)
|
29 |
+
dialect = csv.Sniffer().sniff(sample) # Detect the delimiter
|
30 |
|
|
|
31 |
file.seek(0)
|
32 |
if encoding != 'utf-8':
|
33 |
file_content = file.read().decode(encoding)
|
|
|
48 |
progress_bar = st.progress(0)
|
49 |
progress_text = st.empty()
|
50 |
|
51 |
+
start_time = time.time() # Record start time for total runtime
|
52 |
|
53 |
for index, row in enumerate(filtered_df.to_pandas().itertuples(), 1):
|
54 |
+
iteration_start_time = time.time() # Start time for this iteration
|
55 |
|
56 |
if st.session_state.stop_processing:
|
57 |
progress_text.text("Process stopped by the user.")
|
|
|
68 |
progress = index / filtered_df.height
|
69 |
progress_bar.progress(progress)
|
70 |
|
71 |
+
iteration_time = time.time() - iteration_start_time # Calculate runtime for this iteration
|
72 |
+
total_time = time.time() - start_time # Calculate total elapsed time so far
|
73 |
|
74 |
progress_text.text(f"Progress: {index}/{filtered_df.height} - {progress * 100:.0f}% (Iteration: {iteration_time:.2f}s, Total: {total_time:.2f}s)")
|
75 |
|
76 |
+
end_time = time.time() # Record end time
|
77 |
+
total_execution_time = end_time - start_time # Calculate total runtime
|
78 |
|
79 |
progress_text.text(f"Processing complete! Total execution time: {total_execution_time:.2f}s")
|
80 |
|