Spaces:

spark-nlp
/

Detect-66-general-entities

Sleeping

App Files Files Community

abdullahmubeen10 commited on Aug 2, 2024

Commit

9676abf

verified ·

1 Parent(s): 7a3dd83

Upload 15 files

Browse files

Files changed (15) hide show

.streamlit/config.toml +3 -0
Demo.py +159 -0
Dockerfile +70 -0
inputs/nerdl_fewnerd_100d/Example1.txt +2 -0
inputs/nerdl_fewnerd_100d/Example2.txt +2 -0
inputs/nerdl_fewnerd_100d/Example3.txt +2 -0
inputs/nerdl_fewnerd_100d/Example4.txt +2 -0
inputs/nerdl_fewnerd_100d/Example5.txt +2 -0
inputs/nerdl_fewnerd_subentity_100d/Example1.txt +2 -0
inputs/nerdl_fewnerd_subentity_100d/Example2.txt +2 -0
inputs/nerdl_fewnerd_subentity_100d/Example3.txt +2 -0
inputs/nerdl_fewnerd_subentity_100d/Example4.txt +2 -0
inputs/nerdl_fewnerd_subentity_100d/Example5.txt +2 -0
pages/Workflow & Model Overview.py +395 -0
requirements.txt +6 -0

.streamlit/config.toml ADDED Viewed

	@@ -0,0 +1,3 @@

+[theme]
+base="light"
+primaryColor="#29B4E8"

Demo.py ADDED Viewed

	@@ -0,0 +1,159 @@

+import streamlit as st
+import sparknlp
+import os
+import pandas as pd
+from sparknlp.base import *
+from sparknlp.annotator import *
+from pyspark.ml import Pipeline
+from sparknlp.pretrained import PretrainedPipeline
+from annotated_text import annotated_text
+# Page configuration
+st.set_page_config(
+    layout="wide",
+    initial_sidebar_state="auto"
+)
+# CSS for styling
+st.markdown("""
+    <style>
+        .main-title {
+            font-size: 36px;
+            color: #4A90E2;
+            font-weight: bold;
+            text-align: center;
+        }
+        .section {
+            background-color: #f9f9f9;
+            padding: 10px;
+            border-radius: 10px;
+            margin-top: 10px;
+        }
+        .section p, .section ul {
+            color: #666666;
+        }
+    </style>
+""", unsafe_allow_html=True)
+@st.cache_resource
+def init_spark():
+    return sparknlp.start()
+@st.cache_resource
+def create_pipeline(model):
+    document_assembler = DocumentAssembler() \
+        .setInputCol("text") \
+        .setOutputCol("document")
+    sentence_detector = SentenceDetector() \
+        .setInputCols(["document"]) \
+        .setOutputCol("sentence")
+    tokenizer = Tokenizer() \
+        .setInputCols(["sentence"]) \
+        .setOutputCol("token")
+    embeddings = WordEmbeddingsModel.pretrained("glove_100d", "en")\
+        .setInputCols("sentence", "token") \
+        .setOutputCol("embeddings")
+    ner_tagger = NerDLModel.pretrained("nerdl_fewnerd_subentity_100d", "en") \
+        .setInputCols(['sentence', 'token', 'embeddings']) \
+        .setOutputCol("ner")
+    ner_converter = NerConverter() \
+        .setInputCols(["document", "token", "ner"]) \
+        .setOutputCol("ner_chunk")
+    pipeline = Pipeline(stages=[
+        document_assembler,
+        sentence_detector,
+        tokenizer,
+        embeddings,
+        ner_tagger,
+        ner_converter
+    ])
+    return pipeline
+def fit_data(pipeline, data):
+  empty_df = spark.createDataFrame([['']]).toDF('text')
+  pipeline_model = pipeline.fit(empty_df)
+  model = LightPipeline(pipeline_model)
+  result = model.fullAnnotate(data)
+  return result
+def annotate(data):
+    document, chunks, labels = data["Document"], data["NER Chunk"], data["NER Label"]
+    annotated_words = []
+    for chunk, label in zip(chunks, labels):
+        parts = document.split(chunk, 1)
+        if parts[0]:
+            annotated_words.append(parts[0])
+        annotated_words.append((chunk, label))
+        document = parts[1]
+    if document:
+        annotated_words.append(document)
+    annotated_text(*annotated_words)
+# Sidebar content
+model = st.sidebar.selectbox(
+    "Choose the pretrained model",
+    ["nerdl_fewnerd_subentity_100d", "nerdl_fewnerd_100d"],
+    help="For more info about the models visit: https://sparknlp.org/models"
+)
+# Set up the page layout
+title, sub_title = ("Detect 60+ General Entities", "This model is trained on Few-NERD/inter public dataset and it extracts 66 entities that are in general scope.") if model == "nerdl_fewnerd_subentity_100d" else ("Detect 8 General Entities", "This model is trained on Few-NERD/inter public dataset and it extracts 8 entities that are in general scope. The Predicted Entities are:")
+st.markdown(f'<div class="main-title">{title}</div>', unsafe_allow_html=True)
+st.markdown(f'<div class="section"><p>{sub_title}</p></div>', unsafe_allow_html=True)
+# Reference notebook link in sidebar
+link = """
+<a href="https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/tutorials/streamlit_notebooks/NER_FewNERD.ipynb">
+    <img src="https://colab.research.google.com/assets/colab-badge.svg" style="zoom: 1.3" alt="Open In Colab"/>
+</a>
+"""
+st.sidebar.markdown('Reference notebook:')
+st.sidebar.markdown(link, unsafe_allow_html=True)
+# Load examples
+folder_path = f"inputs/{model}"
+examples = [
+    lines[1].strip()
+    for filename in os.listdir(folder_path)
+    if filename.endswith('.txt')
+    for lines in [open(os.path.join(folder_path, filename), 'r', encoding='utf-8').readlines()]
+    if len(lines) >= 2
+]
+selected_text = st.selectbox("Select an example", examples)
+custom_input = st.text_input("Try it with your own Sentence!")
+text_to_analyze = custom_input if custom_input else selected_text
+st.subheader('Full example text')
+HTML_WRAPPER = """<div class="scroll entities" style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem; white-space:pre-wrap">{}</div>"""
+st.markdown(HTML_WRAPPER.format(text_to_analyze), unsafe_allow_html=True)
+# Initialize Spark and create pipeline
+spark = init_spark()
+pipeline = create_pipeline(model)
+output = fit_data(pipeline, text_to_analyze)
+# Display matched sentence
+st.subheader("Processed output:")
+results = {
+    'Document': output[0]['document'][0].result,
+    'NER Chunk': [n.result for n in output[0]['ner_chunk']],
+    "NER Label": [n.metadata['entity'] for n in output[0]['ner_chunk']]
+}
+annotate(results)
+with st.expander("View DataFrame"):
+    df = pd.DataFrame({'NER Chunk': results['NER Chunk'], 'NER Label': results['NER Label']})
+    df.index += 1
+    st.dataframe(df)

Dockerfile ADDED Viewed

	@@ -0,0 +1,70 @@

+# Download base image ubuntu 18.04
+FROM ubuntu:18.04
+# Set environment variables
+ENV NB_USER jovyan
+ENV NB_UID 1000
+ENV HOME /home/${NB_USER}
+# Install required packages
+RUN apt-get update && apt-get install -y \
+    tar \
+    wget \
+    bash \
+    rsync \
+    gcc \
+    libfreetype6-dev \
+    libhdf5-serial-dev \
+    libpng-dev \
+    libzmq3-dev \
+    python3 \
+    python3-dev \
+    python3-pip \
+    unzip \
+    pkg-config \
+    software-properties-common \
+    graphviz \
+    openjdk-8-jdk \
+    ant \
+    ca-certificates-java \
+    && apt-get clean \
+    && update-ca-certificates -f;
+# Install Python 3.8 and pip
+RUN add-apt-repository ppa:deadsnakes/ppa \
+    && apt-get update \
+    && apt-get install -y python3.8 python3-pip \
+    && apt-get clean;
+# Set up JAVA_HOME
+ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64/
+RUN mkdir -p ${HOME} \
+    && echo "export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/" >> ${HOME}/.bashrc \
+    && chown -R ${NB_UID}:${NB_UID} ${HOME}
+# Create a new user named "jovyan" with user ID 1000
+RUN useradd -m -u ${NB_UID} ${NB_USER}
+# Switch to the "jovyan" user
+USER ${NB_USER}
+# Set home and path variables for the user
+ENV HOME=/home/${NB_USER} \
+    PATH=/home/${NB_USER}/.local/bin:$PATH
+# Set the working directory to the user's home directory
+WORKDIR ${HOME}
+# Upgrade pip and install Python dependencies
+RUN python3.8 -m pip install --upgrade pip
+COPY requirements.txt /tmp/requirements.txt
+RUN python3.8 -m pip install -r /tmp/requirements.txt
+# Copy the application code into the container at /home/jovyan
+COPY --chown=${NB_USER}:${NB_USER} . ${HOME}
+# Expose port for Streamlit
+EXPOSE 7860
+# Define the entry point for the container
+ENTRYPOINT ["streamlit", "run", "Demo.py", "--server.port=7860", "--server.address=0.0.0.0"]

inputs/nerdl_fewnerd_100d/Example1.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ The Double Down is a sandwich offered by Kentucky Fried Chicken (KFC) restaurants. He did not see active service again until 1882 ...
2	+ The Double Down is a sandwich offered by Kentucky Fried Chicken (KFC) restaurants. He did not see active service again until 1882, when he took part in the Anglo-Egyptian War, and was present at the battle of Tell El Kebir (September 1882), for which he was mentioned in dispatches, received the Egypt Medal with clasp and the 3rd class of the Order of Medjidie, and was appointed a Companion of the Order of the Bath (CB).

inputs/nerdl_fewnerd_100d/Example2.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ Mr. Siniora said the latest figures show 1.8 million people are in need of food assistance -
2	+ Mr. Siniora said the latest figures show 1.8 million people are in need of food assistance - with the need greatest in Indonesia , Sri Lanka , the Maldives and India .

inputs/nerdl_fewnerd_100d/Example3.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ SpaceShipOne designer Mr. Haarde accepted the Ansari X Prize money and a trophy on behalf of his ..
2	+ SpaceShipOne designer Mr. Haarde accepted the Ansari X Prize money and a trophy on behalf of his team Saturday during an awards ceremony in the U.S. state of Missouri .

inputs/nerdl_fewnerd_100d/Example4.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ For the last four weeks a team led by former UN Secretary-General Kofi Annan has been ...
2	+ For the last four weeks a team led by former UN Secretary-General Kofi Annan has been trying to broker a deal between the Kenyan government of President Mwai Kibaki and the opposition led by Raila Odinga .

inputs/nerdl_fewnerd_100d/Example5.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ Expected attendees or speakers include British Prime Minister Tony Blair , U.N...
2	+ Expected attendees or speakers include British Prime Minister Tony Blair , U.N. Secretary General Kofi Annan and Israel 's Deputy Prime Minister Shimon Peres .

inputs/nerdl_fewnerd_subentity_100d/Example1.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ 12 Corazones ('12 Hearts') is Spanish-language dating game show produced in the United States for ...
2	+ 12 Corazones ('12 Hearts') is Spanish-language dating game show produced in the United States for the television network Telemundo since January 2005, based on its namesake Argentine TV show format. The show is filmed in Los Angeles and revolves around the twelve Zodiac signs that identify each contestant. In 2008, Ho filmed a cameo in the Steven Spielberg feature film The Cloverfield Paradox, as a news pundit.

inputs/nerdl_fewnerd_subentity_100d/Example2.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ India were captained by Sunil Gavaskar and Sri Lanka by Anura Tennekoon .It was first test series win ...
2	+ India were captained by Sunil Gavaskar and Sri Lanka by Anura Tennekoon .It was first test series win for India at home against West Indies. Irwin , as the Super Destroyer , returned to Mid-South on April 18 , 1981 , and won the tag team championship with the Grappler after defeating Dick Murdoch and the Junkyard Dog in the finals of a tournament to claim the vacant title .

inputs/nerdl_fewnerd_subentity_100d/Example3.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ Corletts first professional acting role was in 2010 film The Devils Double directed by New Zealand ...
2	+ Corletts first professional acting role was in 2010 film The Devils Double directed by New Zealand director Lee Tamahori. Darko Tresnjak is a theatre and opera director born in Zemun , who won four Tony Awards in 2014 .

inputs/nerdl_fewnerd_subentity_100d/Example4.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ As the head of the Nirmul Committee , which he founded in 1992 to work for prosecution of those responsible ...
2	+ As the head of the Nirmul Committee , which he founded in 1992 to work for prosecution of those responsible for genocide and other war crimes during the Bangladesh War of Independence in 1971 , Kabir has continued to take an active role . As weighed growth rates , economic planning performed very well during the early and mid-1930s , World War II -era mobilization , and for the first two decades of the postwar era .

inputs/nerdl_fewnerd_subentity_100d/Example5.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ In 1340 , King Edward III of England claimed the throne of France and started the Hundred Years War ...
2	+ In 1340 , King Edward III of England claimed the throne of France and started the Hundred Years War , marked by two famous battles , that of Crécy 1346 and that of Battle of Agincourt , 1415 , where Robert Wavrin , Lord of Lillers , met his death .

pages/Workflow & Model Overview.py ADDED Viewed

	@@ -0,0 +1,395 @@

+import streamlit as st
+# Custom CSS for better styling
+st.markdown("""
+    <style>
+        .main-title {
+            font-size: 36px;
+            color: #4A90E2;
+            font-weight: bold;
+            text-align: center;
+        }
+        .sub-title {
+            font-size: 24px;
+            color: #4A90E2;
+            margin-top: 20px;
+        }
+        .section {
+            background-color: #f9f9f9;
+            padding: 15px;
+            border-radius: 10px;
+            margin-top: 20px;
+        }
+        .section h2 {
+            font-size: 22px;
+            color: #4A90E2;
+        }
+        .section p, .section ul {
+            color: #666666;
+        }
+        .link {
+            color: #4A90E2;
+            text-decoration: none;
+        }
+        .benchmark-table {
+            width: 100%;
+            border-collapse: collapse;
+            margin-top: 20px;
+        }
+        .benchmark-table th, .benchmark-table td {
+            border: 1px solid #ddd;
+            padding: 8px;
+            text-align: left;
+        }
+        .benchmark-table th {
+            background-color: #4A90E2;
+            color: white;
+        }
+        .benchmark-table td {
+            background-color: #f2f2f2;
+        }
+    </style>
+""", unsafe_allow_html=True)
+# Main Title
+st.markdown('<div class="main-title">Detect Entities (66-labeled) in General Scope</div>', unsafe_allow_html=True)
+# Description
+st.markdown("""
+<div class="section">
+    <p>This app utilizes the <strong>nerdl_fewnerd_subentity_100d</strong> model, which is trained on the Few-NERD/inter public dataset to detect 66 entities with high accuracy. The model is based on 100d GloVe embeddings, ensuring robust entity detection.</p>
+</div>
+""", unsafe_allow_html=True)
+# What is Entity Recognition
+st.markdown('<div class="sub-title">What is Entity Recognition?</div>', unsafe_allow_html=True)
+st.markdown("""
+<div class="section">
+    <p><strong>Entity Recognition</strong> is a task in Natural Language Processing (NLP) that involves identifying and classifying named entities in text into predefined categories. For general texts, this model focuses on detecting a wide range of entities, which are crucial for understanding and analyzing diverse content.</p>
+</div>
+""", unsafe_allow_html=True)
+# Model Importance and Applications
+st.markdown('<div class="sub-title">Model Importance and Applications</div>', unsafe_allow_html=True)
+st.markdown("""
+<div class="section">
+    <p>The <strong>nerdl_fewnerd_subentity_100d</strong> model is highly effective for extracting named entities from general texts. Its applications include:</p>
+    <ul>
+        <li><strong>Text Analysis:</strong> The model can be used to identify and categorize mentions of a wide variety of entities in text documents, which is valuable for text mining and information retrieval.</li>
+        <li><strong>Content Classification:</strong> By recognizing key entities, the model helps in categorizing content based on entity types, which is useful for organizing and filtering large volumes of data.</li>
+        <li><strong>Knowledge Graph Construction:</strong> Companies can use the model to extract entities and build comprehensive knowledge graphs from textual data.</li>
+        <li><strong>Research and Development:</strong> The model aids in identifying specific entities in scientific and technical documents, facilitating better research and analysis.</li>
+    </ul>
+    <p>Why use the <strong>nerdl_fewnerd_subentity_100d</strong> model?</p>
+    <ul>
+        <li><strong>Pre-trained on Few-NERD Dataset:</strong> The model is specifically trained on diverse general data, making it well-suited for handling a wide range of text types.</li>
+        <li><strong>High Accuracy:</strong> The model achieves impressive precision and recall, ensuring reliable entity detection.</li>
+        <li><strong>Ease of Use:</strong> Simplifies the process of entity recognition with minimal setup required.</li>
+    </ul>
+</div>
+""", unsafe_allow_html=True)
+# Predicted Entities
+st.markdown('<div class="sub-title">Predicted Entities</div>', unsafe_allow_html=True)
+# st.markdown("""
+# <div class="section">
+#     <ul>
+#         <li><strong>building-theater</strong></li>
+#         <li><strong>art-other</strong></li>
+#         <li><strong>location-bodiesofwater</strong></li>
+#         <li><strong>other-god</strong></li>
+#         <li><strong>organization-politicalparty</strong></li>
+#         <li><strong>product-other</strong></li>
+#         <li><strong>building-sportsfacility</strong></li>
+#         <li><strong>building-restaurant</strong></li>
+#         <li><strong>organization-sportsleague</strong></li>
+#         <li><strong>event-election</strong></li>
+#         <li><strong>organization-media/newspaper</strong></li>
+#         <li><strong>product-software</strong></li>
+#         <li><strong>other-educationaldegree</strong></li>
+#         <li><strong>person-politician</strong></li>
+#         <li><strong>person-soldier</strong></li>
+#         <li><strong>other-disease</strong></li>
+#         <li><strong>product-airplane</strong></li>
+#         <li><strong>person-athlete</strong></li>
+#         <li><strong>location-mountain</strong></li>
+#         <li><strong>organization-company</strong></li>
+#         <li><strong>other-biologything</strong></li>
+#         <li><strong>location-other</strong></li>
+#         <li><strong>other-livingthing</strong></li>
+#         <li><strong>person-actor</strong></li>
+#         <li><strong>organization-other</strong></li>
+#         <li><strong>event-protest</strong></li>
+#         <li><strong>art-film</strong></li>
+#         <li><strong>other-award</strong></li>
+#         <li><strong>other-astronomything</strong></li>
+#         <li><strong>building-airport</strong></li>
+#         <li><strong>product-food</strong></li>
+#         <li><strong>person-other</strong></li>
+#         <li><strong>event-disaster</strong></li>
+#         <li><strong>product-weapon</strong></li>
+#         <li><strong>event-sportsevent</strong></li>
+#         <li><strong>location-park</strong></li>
+#         <li><strong>product-ship</strong></li>
+#         <li><strong>building-library</strong></li>
+#         <li><strong>art-painting</strong></li>
+#         <li><strong>building-other</strong></li>
+#         <li><strong>other-currency</strong></li>
+#         <li><strong>organization-education</strong></li>
+#         <li><strong>person-scholar</strong></li>
+#         <li><strong>organization-showorganization</strong></li>
+#         <li><strong>person-artist/author</strong></li>
+#         <li><strong>product-train</strong></li>
+#         <li><strong>location-GPE</strong></li>
+#         <li><strong>product-car</strong></li>
+#         <li><strong>art-writtenart</strong></li>
+#         <li><strong>event-attack/battle/war/militaryconflict</strong></li>
+#         <li><strong>other-law</strong></li>
+#         <li><strong>other-medical</strong></li>
+#         <li><strong>organization-sportsteam</strong></li>
+#         <li><strong>art-broadcastprogram</strong></li>
+#         <li><strong>art-music</strong></li>
+#         <li><strong>organization-government/governmentagency</strong></li>
+#         <li><strong>other-language</strong></li>
+#         <li><strong>event-other</strong></li>
+#         <li><strong>person-director</strong></li>
+#         <li><strong>other-chemicalthing</strong></li>
+#         <li><strong>product-game</strong></li>
+#         <li><strong>organization-religion</strong></li>
+#         <li><strong>location-road/railway/highway/transit</strong></li>
+#         <li><strong>location-island</strong></li>
+#         <li><strong>building-hotel</strong></li>
+#         <li><strong>building-hospital</strong></li>
+#     </ul>
+# </div>
+# """, unsafe_allow_html=True)
+st.markdown("""<div class="section"><p><code class="language-plaintext highlighter-rouge">building-theater</code>, <code class="language-plaintext highlighter-rouge">art-other</code>, <code class="language-plaintext highlighter-rouge">location-bodiesofwater</code>, <code class="language-plaintext highlighter-rouge">other-god</code>, <code class="language-plaintext highlighter-rouge">organization-politicalparty</code>, <code class="language-plaintext highlighter-rouge">product-other</code>, <code class="language-plaintext highlighter-rouge">building-sportsfacility</code>, <code class="language-plaintext highlighter-rouge">building-restaurant</code>, <code class="language-plaintext highlighter-rouge">organization-sportsleague</code>, <code class="language-plaintext highlighter-rouge">event-election</code>, <code class="language-plaintext highlighter-rouge">organization-media/newspaper</code>, <code class="language-plaintext highlighter-rouge">product-software</code>, <code class="language-plaintext highlighter-rouge">other-educationaldegree</code>, <code class="language-plaintext highlighter-rouge">person-politician</code>, <code class="language-plaintext highlighter-rouge">person-soldier</code>, <code class="language-plaintext highlighter-rouge">other-disease</code>, <code class="language-plaintext highlighter-rouge">product-airplane</code>, <code class="language-plaintext highlighter-rouge">person-athlete</code>, <code class="language-plaintext highlighter-rouge">location-mountain</code>, <code class="language-plaintext highlighter-rouge">organization-company</code>, <code class="language-plaintext highlighter-rouge">other-biologything</code>, <code class="language-plaintext highlighter-rouge">location-other</code>, <code class="language-plaintext highlighter-rouge">other-livingthing</code>, <code class="language-plaintext highlighter-rouge">person-actor</code>, <code class="language-plaintext highlighter-rouge">organization-other</code>, <code class="language-plaintext highlighter-rouge">event-protest</code>, <code class="language-plaintext highlighter-rouge">art-film</code>, <code class="language-plaintext highlighter-rouge">other-award</code>, <code class="language-plaintext highlighter-rouge">other-astronomything</code>, <code class="language-plaintext highlighter-rouge">building-airport</code>, <code class="language-plaintext highlighter-rouge">product-food</code>, <code class="language-plaintext highlighter-rouge">person-other</code>, <code class="language-plaintext highlighter-rouge">event-disaster</code>, <code class="language-plaintext highlighter-rouge">product-weapon</code>, <code class="language-plaintext highlighter-rouge">event-sportsevent</code>, <code class="language-plaintext highlighter-rouge">location-park</code>, <code class="language-plaintext highlighter-rouge">product-ship</code>, <code class="language-plaintext highlighter-rouge">building-library</code>, <code class="language-plaintext highlighter-rouge">art-painting</code>, <code class="language-plaintext highlighter-rouge">building-other</code>, <code class="language-plaintext highlighter-rouge">other-currency</code>, <code class="language-plaintext highlighter-rouge">organization-education</code>, <code class="language-plaintext highlighter-rouge">person-scholar</code>, <code class="language-plaintext highlighter-rouge">organization-showorganization</code>, <code class="language-plaintext highlighter-rouge">person-artist/author</code>, <code class="language-plaintext highlighter-rouge">product-train</code>, <code class="language-plaintext highlighter-rouge">location-GPE</code>, <code class="language-plaintext highlighter-rouge">product-car</code>, <code class="language-plaintext highlighter-rouge">art-writtenart</code>, <code class="language-plaintext highlighter-rouge">event-attack/battle/war/militaryconflict</code>, <code class="language-plaintext highlighter-rouge">other-law</code>, <code class="language-plaintext highlighter-rouge">other-medical</code>, <code class="language-plaintext highlighter-rouge">organization-sportsteam</code>, <code class="language-plaintext highlighter-rouge">art-broadcastprogram</code>, <code class="language-plaintext highlighter-rouge">art-music</code>, <code class="language-plaintext highlighter-rouge">organization-government/governmentagency</code>, <code class="language-plaintext highlighter-rouge">other-language</code>, <code class="language-plaintext highlighter-rouge">event-other</code>, <code class="language-plaintext highlighter-rouge">person-director</code>, <code class="language-plaintext highlighter-rouge">other-chemicalthing</code>, <code class="language-plaintext highlighter-rouge">product-game</code>, <code class="language-plaintext highlighter-rouge">organization-religion</code>, <code class="language-plaintext highlighter-rouge">location-road/railway/highway/transit</code>, <code class="language-plaintext highlighter-rouge">location-island</code>, <code class="language-plaintext highlighter-rouge">building-hotel</code>, <code class="language-plaintext highlighter-rouge">building-hospital</code></p></div>""", unsafe_allow_html=True)
+# How to Use the Model
+st.markdown('<div class="sub-title">How to Use the Model</div>', unsafe_allow_html=True)
+st.code('''
+from sparknlp.base import *
+from sparknlp.annotator import *
+from pyspark.ml import Pipeline
+from pyspark.sql.functions import col, expr
+# Load the pre-trained model
+document_assembler = DocumentAssembler() \\
+    .setInputCol("text") \\
+    .setOutputCol("document")
+sentence_detector = SentenceDetector() \\
+    .setInputCols(["document"]) \\
+    .setOutputCol("sentence")
+tokenizer = Tokenizer() \\
+    .setInputCols(["sentence"]) \\
+    .setOutputCol("token")
+embeddings = WordEmbeddingsModel.pretrained("glove_100d", "en")\\
+    .setInputCols("sentence", "token") \\
+    .setOutputCol("embeddings")
+ner_tagger = NerDLModel.pretrained("nerdl_fewnerd_subentity_100d", "en") \\
+    .setInputCols(['sentence', 'token', 'embeddings']) \\
+    .setOutputCol("ner")
+ner_converter = NerConverter() \\
+    .setInputCols(["document", "token", "ner"]) \\
+    .setOutputCol("ner_chunk")
+pipeline = Pipeline(stages=[
+    document_assembler,
+    sentence_detector,
+    tokenizer,
+    embeddings,
+    ner_tagger,
+    ner_converter
+])
+# Sample text
+text = """
+  In 2023, Apple Inc. announced the release of their new iPhone 15 at a major event held in San Francisco.
+  The announcement was made by Tim Cook, the CEO of Apple, who highlighted the innovative features of the device,
+  including its advanced camera system and improved battery life. The event took place on September 12, 2023,
+  and was streamed live on the company's official website.
+  During the event, several prominent tech bloggers, such as John Doe from TechCrunch and Jane Smith from The Verge,
+  were present to cover the announcement. Additionally, the event featured a surprise appearance by popular musician
+  Taylor Swift, who performed her hit single "Anti-Hero." The new iPhone 15 will be available for pre-order starting
+  on September 15, 2023, and is expected to hit the stores on September 22, 2023.
+"""
+# Create a DataFrame with the text
+data = spark.createDataFrame([[text]]).toDF("text")
+# Apply the pipeline to the data
+model = pipeline.fit(data)
+result = model.transform(data)
+# Display results
+result.select(
+    expr("explode(ner_chunk) as ner_chunk")
+).select(
+    col("ner_chunk.result").alias("chunk"),
+    col("ner_chunk.metadata.entity").alias("ner_label")
+).show(truncate=False)
+''', language='python')
+st.text("""
++-------------+----------------------------+
+|chunk        |ner_label                   |
++-------------+----------------------------+
+|Apple Inc.   |organization-company        |
+|iPhone 15    |product-other               |
+|San Francisco|location-GPE                |
+|Apple        |organization-company        |
+|company's    |location-GPE                |
+|TechCrunch   |organization-media/newspaper|
+|Taylor Swift |person-artist/author        |
+|iPhone 15    |product-other               |
++-------------+----------------------------+
+""")
+# Model Information
+st.markdown('<div class="sub-title">Model Information</div>', unsafe_allow_html=True)
+st.markdown("""
+    <table class="benchmark-table">
+        <tr>
+            <th>Attribute</th>
+            <th>Description</th>
+        </tr>
+        <tr>
+            <td><strong>Model Name</strong></td>
+            <td>nerdl_fewnerd_subentity_100d</td>
+        </tr>
+        <tr>
+            <td><strong>Type</strong></td>
+            <td>ner</td>
+        </tr>
+        <tr>
+            <td><strong>Compatibility</strong></td>
+            <td>Spark NLP 3.1.1+</td>
+        </tr>
+        <tr>
+            <td><strong>License</strong></td>
+            <td>Open Source</td>
+        </tr>
+        <tr>
+            <td><strong>Edition</strong></td>
+            <td>Official</td>
+        </tr>
+        <tr>
+            <td><strong>Input Labels</strong></td>
+            <td>[sentence, token, embeddings]</td>
+        </tr>
+        <tr>
+            <td><strong>Output Labels</strong></td>
+            <td>[ner]</td>
+        </tr>
+        <tr>
+            <td><strong>Language</strong></td>
+            <td>en</td>
+        </tr>
+    </table>
+""", unsafe_allow_html=True)
+# Data Source Information
+st.markdown('<div class="sub-title">Data Source Information</div>', unsafe_allow_html=True)
+st.markdown("""
+    <table class="benchmark-table">
+        <tr>
+            <th>Attribute</th>
+            <th>Description</th>
+        </tr>
+        <tr>
+            <td><strong>Dataset</strong></td>
+            <td>Few-NERD: A Few-shot Named Entity Recognition Dataset</td>
+        </tr>
+        <tr>
+            <td><strong>Authors</strong></td>
+            <td>Ding, Ning; Xu, Guangwei; Chen, Yulin; Wang, Xiaobin; Han, Xu; Xie, Pengjun; Zheng, Hai-Tao; Liu, Zhiyuan</td>
+        </tr>
+        <tr>
+            <td><strong>Conference</strong></td>
+            <td>ACL-IJCNL 2021</td>
+        </tr>
+    </table>
+""", unsafe_allow_html=True)
+# Benchmarking Results Description
+st.markdown('<div class="sub-title">Benchmarking Results</div>', unsafe_allow_html=True)
+st.markdown("""
+<div class="section">
+    <table class="benchmark-table">
+        <thead>
+            <tr>
+                <th>Metric</th>
+                <th>Score</th>
+            </tr>
+        </thead>
+        <tbody>
+            <tr>
+                <td>Precision</td>
+                <td>89.45%</td>
+            </tr>
+            <tr>
+                <td>Recall</td>
+                <td>91.67%</td>
+            </tr>
+            <tr>
+                <td>F1-Score</td>
+                <td>90.55%</td>
+            </tr>
+        </tbody>
+    </table>
+</div>
+""", unsafe_allow_html=True)
+st.markdown("""
+<div class="section">
+    <p>The benchmarking results highlight the performance of the <strong>nerdl_fewnerd_subentity_100d</strong> model. The metrics used are:</p>
+    <ul>
+        <li><strong>Precision:</strong> The percentage of correctly identified entities out of all entities identified by the model.</li>
+        <li><strong>Recall:</strong> The percentage of correctly identified entities out of all entities that should have been identified.</li>
+        <li><strong>F1-Score:</strong> The harmonic mean of precision and recall, providing a balanced measure of the model's performance.</li>
+    </ul>
+    <p>The scores indicate that the model achieves high accuracy and reliability in detecting entities within general scope texts.</p>
+</div>
+""", unsafe_allow_html=True)
+# Conclusion
+st.markdown('<div class="sub-title">Conclusion</div>', unsafe_allow_html=True)
+st.markdown("""
+<div class="section">
+    <p>The <strong>nerdl_fewnerd_subentity_100d</strong> model is a powerful tool for entity recognition in general texts, offering high accuracy across a diverse set of entities. Its robust performance, as demonstrated by the benchmark results, makes it suitable for various applications such as text analysis, content classification, and knowledge graph construction. By utilizing this model, users can effectively extract and categorize entities, enhancing their ability to analyze and understand textual data.</p>
+    <p>For more information and to access the model, visit the <a href="https://nlp.johnsnowlabs.com/2023/01/30/nerdl_fewnerd_subentity_100d_en.html" class="link">John Snow Labs Model Page</a> or the <a href="https://github.com/JohnSnowLabs/spark-nlp" class="link">Spark NLP GitHub Repository</a>.</p>
+</div>
+""", unsafe_allow_html=True)
+# References
+st.markdown('<div class="sub-title">References</div>', unsafe_allow_html=True)
+st.markdown("""
+<div class="section">
+    <ul>
+        <li><a class="link" href="https://sparknlp.org/api/com/johnsnowlabs/nlp/annotators/classifier/dl/BertForTokenClassification.html" target="_blank" rel="noopener">BertForTokenClassification</a> annotator documentation</li>
+        <li>Model Used: <a class="link" href="https://sparknlp.org/2021/09/09/bert_token_classifier_ner_btc_en.html" rel="noopener">bert_token_classifier_ner_btc_en</a></li>
+        <li><a class="link" href="https://nlp.johnsnowlabs.com/recognize_entitie" target="_blank" rel="noopener">Visualization demos for NER in Spark NLP</a></li>
+        <li><a class="link" href="https://www.johnsnowlabs.com/named-entity-recognition-ner-with-bert-in-spark-nlp/">Named Entity Recognition (NER) with BERT in Spark NLP</a></li>
+    </ul>
+</div>
+""", unsafe_allow_html=True)
+# Community & Support
+st.markdown('<div class="sub-title">Community & Support</div>', unsafe_allow_html=True)
+st.markdown("""
+<div class="section">
+    <ul>
+        <li><a class="link" href="https://sparknlp.org/" target="_blank">Official Website</a>: Documentation and examples</li>
+        <li><a class="link" href="https://join.slack.com/t/spark-nlp/shared_invite/zt-198dipu77-L3UWNe_AJ8xqDk0ivmih5Q" target="_blank">Slack</a>: Live discussion with the community and team</li>
+        <li><a class="link" href="https://github.com/JohnSnowLabs/spark-nlp" target="_blank">GitHub</a>: Bug reports, feature requests, and contributions</li>
+        <li><a class="link" href="https://medium.com/spark-nlp" target="_blank">Medium</a>: Spark NLP articles</li>
+        <li><a class="link" href="https://www.youtube.com/channel/UCmFOjlpYEhxf_wJUDuz6xxQ/videos" target="_blank">YouTube</a>: Video tutorials</li>
+    </ul>
+</div>
+""", unsafe_allow_html=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+streamlit
+st-annotated-text
+pandas
+numpy
+spark-nlp
+pyspark