Spaces:

oliviercaron
/

GLiNER_file

Running

App Files Files Community

Olivier CARON commited on Apr 11, 2024

Commit

f87d000

verified ·

1 Parent(s): c49547e

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -12

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-import os  # Add this import to use os.path.splitext
 import csv
 import streamlit as st
 import polars as pl
@@ -16,19 +16,18 @@ def load_data(file):
     if file_ext.lower() in ['.xls', '.xlsx']:
         return pl.read_excel(file)
     elif file_ext.lower() == '.csv':
-        file.seek(0)  # Retour au début du fichier
         try:
-            sample = file.read(4096).decode('utf-8')  # Essayer de décoder l'échantillon en UTF-8
             encoding = 'utf-8'
         except UnicodeDecodeError:
-            encoding = 'latin1'  # Basculer sur 'latin1' si UTF-8 échoue
             file.seek(0)
             sample = file.read(4096).decode(encoding)
         file.seek(0)
-        dialect = csv.Sniffer().sniff(sample)  # Détecter le dialecte/délimiteur
-        # Convertir le fichier en StringIO pour simuler un fichier texte, si nécessaire
         file.seek(0)
         if encoding != 'utf-8':
             file_content = file.read().decode(encoding)
@@ -49,10 +48,10 @@ def perform_ner(filtered_df, selected_column, labels_list):
     progress_bar = st.progress(0)
     progress_text = st.empty()
-    start_time = time.time()  # Enregistrer le temps de début pour le temps d'exécution total
     for index, row in enumerate(filtered_df.to_pandas().itertuples(), 1):
-        iteration_start_time = time.time()  # Temps de début pour cette itération
         if st.session_state.stop_processing:
             progress_text.text("Process stopped by the user.")
@@ -69,13 +68,13 @@ def perform_ner(filtered_df, selected_column, labels_list):
         progress = index / filtered_df.height
         progress_bar.progress(progress)
-        iteration_time = time.time() - iteration_start_time  # Calculer le temps d'exécution pour cette itération
-        total_time = time.time() - start_time  # Calculer le temps total écoulé jusqu'à présent
         progress_text.text(f"Progress: {index}/{filtered_df.height} - {progress * 100:.0f}% (Iteration: {iteration_time:.2f}s, Total: {total_time:.2f}s)")
-    end_time = time.time()  # Enregistrer le temps de fin
-    total_execution_time = end_time - start_time  # Calculer le temps d'exécution total
     progress_text.text(f"Processing complete! Total execution time: {total_execution_time:.2f}s")

+import os
 import csv
 import streamlit as st
 import polars as pl
     if file_ext.lower() in ['.xls', '.xlsx']:
         return pl.read_excel(file)
     elif file_ext.lower() == '.csv':
+        file.seek(0)  # Go back to the beginning of the file
         try:
+            sample = file.read(4096).decode('utf-8')  # Try to decode the sample in UTF-8
             encoding = 'utf-8'
         except UnicodeDecodeError:
+            encoding = 'latin1'  # Switch to 'latin1' if UTF-8 fails
             file.seek(0)
             sample = file.read(4096).decode(encoding)
         file.seek(0)
+        dialect = csv.Sniffer().sniff(sample)  # Detect the delimiter
         file.seek(0)
         if encoding != 'utf-8':
             file_content = file.read().decode(encoding)
     progress_bar = st.progress(0)
     progress_text = st.empty()
+    start_time = time.time()  # Record start time for total runtime
     for index, row in enumerate(filtered_df.to_pandas().itertuples(), 1):
+        iteration_start_time = time.time()  # Start time for this iteration
         if st.session_state.stop_processing:
             progress_text.text("Process stopped by the user.")
         progress = index / filtered_df.height
         progress_bar.progress(progress)
+        iteration_time = time.time() - iteration_start_time  # Calculate runtime for this iteration
+        total_time = time.time() - start_time  # Calculate total elapsed time so far
         progress_text.text(f"Progress: {index}/{filtered_df.height} - {progress * 100:.0f}% (Iteration: {iteration_time:.2f}s, Total: {total_time:.2f}s)")
+    end_time = time.time()  # Record end time
+    total_execution_time = end_time - start_time  # Calculate total runtime
     progress_text.text(f"Processing complete! Total execution time: {total_execution_time:.2f}s")