Spaces:

oliviercaron
/

GLiNER_file

Sleeping

App Files Files Community

oliviercaron commited on Oct 17, 2024

Commit

9d9b743

verified ·

1 Parent(s): c949917

Delete app.py

Browse files

Files changed (1) hide show

app.py +0 -173

app.py DELETED Viewed

@@ -1,173 +0,0 @@
-import os
-import csv
-import streamlit as st
-import polars as pl
-from io import BytesIO, StringIO
-from gliner import GLiNER
-from gliner_file import run_ner
-import time
-st.set_page_config(
-    page_title="GliNER", page_icon="🧊", layout="wide", initial_sidebar_state="expanded"
-)
-# Modified function to load data from either an Excel or CSV file
-@st.cache_data
-def load_data(file):
-    _, file_ext = os.path.splitext(file.name)
-    if file_ext.lower() in [".xls", ".xlsx"]:
-        return pl.read_excel(file)
-    elif file_ext.lower() == ".csv":
-        file.seek(0)  # Go back to the beginning of the file
-        try:
-            sample = file.read(4096).decode(
-                "utf-8"
-            )  # Try to decode the sample in UTF-8
-            encoding = "utf-8"
-        except UnicodeDecodeError:
-            encoding = "latin1"  # Switch to 'latin1' if UTF-8 fails
-            file.seek(0)
-            sample = file.read(4096).decode(encoding)
-        file.seek(0)
-        dialect = csv.Sniffer().sniff(sample)  # Detect the delimiter
-        file.seek(0)
-        if encoding != "utf-8":
-            file_content = file.read().decode(encoding)
-            file = StringIO(file_content)
-        else:
-            file_content = file.read().decode("utf-8")
-            file = StringIO(file_content)
-        return pl.read_csv(
-            file,
-            separator=dialect.delimiter,
-            truncate_ragged_lines=True,
-            ignore_errors=True,
-        )
-    else:
-        raise ValueError("The uploaded file must be a CSV or Excel file.")
-# Function to perform NER and update the UI
-def perform_ner(filtered_df, selected_column, labels_list):
-    ner_results_dict = {label: [] for label in labels_list}
-    progress_bar = st.progress(0)
-    progress_text = st.empty()
-    start_time = time.time()  # Record start time for total runtime
-    for index, row in enumerate(filtered_df.to_pandas().itertuples(), 1):
-        iteration_start_time = time.time()  # Start time for this iteration
-        if st.session_state.stop_processing:
-            progress_text.text("Process stopped by the user.")
-            break
-        text_to_analyze = getattr(row, selected_column)
-        ner_results = run_ner(
-            st.session_state.gliner_model, text_to_analyze, labels_list
-        )
-        for label in labels_list:
-            texts = ner_results.get(label, [])
-            concatenated_texts = ", ".join(texts)
-            ner_results_dict[label].append(concatenated_texts)
-        progress = index / filtered_df.height
-        progress_bar.progress(progress)
-        iteration_time = (
-            time.time() - iteration_start_time
-        )  # Calculate runtime for this iteration
-        total_time = time.time() - start_time  # Calculate total elapsed time so far
-        progress_text.text(
-            f"Progress: {index}/{filtered_df.height} - {progress * 100:.0f}% (Iteration: {iteration_time:.2f}s, Total: {total_time:.2f}s)"
-        )
-    end_time = time.time()  # Record end time
-    total_execution_time = end_time - start_time  # Calculate total runtime
-    progress_text.text(
-        f"Processing complete! Total execution time: {total_execution_time:.2f}s"
-    )
-    for label, texts in ner_results_dict.items():
-        filtered_df = filtered_df.with_columns(pl.Series(name=label, values=texts))
-    return filtered_df
-def main():
-    st.title("Online NER with GliNER")
-    st.markdown("Prototype v0.1")
-    # Ensure the stop_processing flag is initialized
-    if "stop_processing" not in st.session_state:
-        st.session_state.stop_processing = False
-    uploaded_file = st.sidebar.file_uploader("Choose a file")
-    if uploaded_file is None:
-        st.warning("Please upload a file.")
-        return
-    try:
-        df = load_data(uploaded_file)
-    except ValueError as e:
-        st.error(str(e))
-        return
-    selected_column = st.selectbox("Select the column for NER:", df.columns, index=0)
-    filter_text = st.text_input("Filter column by input text", "")
-    ner_labels = st.text_input(
-        "Enter all your different labels, separated by a comma", ""
-    )
-    filtered_df = (
-        df.filter(pl.col(selected_column).str.contains(f"(?i).*{filter_text}.*"))
-        if filter_text
-        else df
-    )
-    st.dataframe(filtered_df)
-    if st.button("Start NER"):
-        if not ner_labels:
-            st.warning("Please enter some labels for NER.")
-        else:
-            # Load GLiNER model if not already loaded
-            if "gliner_model" not in st.session_state:
-                with st.spinner("Loading GLiNER model... Please wait."):
-                    st.session_state.gliner_model = GLiNER.from_pretrained(
-                        "urchade/gliner_largev2"
-                    )
-                    st.session_state.gliner_model.eval()
-            labels_list = ner_labels.split(",")
-            updated_df = perform_ner(filtered_df, selected_column, labels_list)
-            st.dataframe(updated_df)
-            def to_excel(df):
-                output = BytesIO()
-                df.to_pandas().to_excel(output, index=False, engine="openpyxl")
-                return output.getvalue()
-            df_excel = to_excel(updated_df)
-            st.download_button(
-                label="📥 Download Excel",
-                data=df_excel,
-                file_name="ner_results.xlsx",
-                mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
-            )
-    st.button(
-        "Stop Processing",
-        on_click=lambda: setattr(st.session_state, "stop_processing", True),
-    )
-if __name__ == "__main__":
-    main()