abdullahmubeen10 commited on
Commit
e79dcee
·
verified ·
1 Parent(s): 1f3dd94

Upload 10 files

Browse files
.streamlit/config.toml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [theme]
2
+ base="light"
3
+ primaryColor="#29B4E8"
Demo.py ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import sparknlp
3
+ import os
4
+ import pandas as pd
5
+
6
+ from sparknlp.base import *
7
+ from sparknlp.annotator import *
8
+ from pyspark.ml import Pipeline
9
+ from sparknlp.pretrained import PretrainedPipeline
10
+ from annotated_text import annotated_text
11
+
12
+ # Page configuration
13
+ st.set_page_config(
14
+ layout="wide",
15
+ initial_sidebar_state="auto"
16
+ )
17
+
18
+ # CSS for styling
19
+ st.markdown("""
20
+ <style>
21
+ .main-title {
22
+ font-size: 36px;
23
+ color: #4A90E2;
24
+ font-weight: bold;
25
+ text-align: center;
26
+ }
27
+ .section {
28
+ background-color: #f9f9f9;
29
+ padding: 10px;
30
+ border-radius: 10px;
31
+ margin-top: 10px;
32
+ }
33
+ .section p, .section ul {
34
+ color: #666666;
35
+ }
36
+ </style>
37
+ """, unsafe_allow_html=True)
38
+
39
+ @st.cache_resource
40
+ def init_spark():
41
+ return sparknlp.start()
42
+
43
+ @st.cache_resource
44
+ def create_pipeline(model):
45
+ document_assembler = DocumentAssembler() \
46
+ .setInputCol("text") \
47
+ .setOutputCol("document")
48
+
49
+ sentence_detector = SentenceDetector() \
50
+ .setInputCols(["document"]) \
51
+ .setOutputCol("sentence")
52
+
53
+ tokenizer = Tokenizer() \
54
+ .setInputCols(["sentence"]) \
55
+ .setOutputCol("token")
56
+
57
+ embeddings = WordEmbeddingsModel.pretrained("glove_100d", "en") \
58
+ .setInputCols("sentence", "token") \
59
+ .setOutputCol("embeddings")
60
+
61
+ ner_restaurant = NerDLModel.pretrained("nerdl_restaurant_100d", "en") \
62
+ .setInputCols(["sentence", "token", "embeddings"]) \
63
+ .setOutputCol("ner")
64
+
65
+ ner_converter = NerConverter() \
66
+ .setInputCols(["sentence", "token", "ner"]) \
67
+ .setOutputCol("ner_chunk")
68
+
69
+ pipeline = Pipeline(stages=[
70
+ document_assembler,
71
+ sentence_detector,
72
+ tokenizer,
73
+ embeddings,
74
+ ner_restaurant,
75
+ ner_converter
76
+ ])
77
+ return pipeline
78
+
79
+ def fit_data(pipeline, data):
80
+ empty_df = spark.createDataFrame([['']]).toDF('text')
81
+ pipeline_model = pipeline.fit(empty_df)
82
+ model = LightPipeline(pipeline_model)
83
+ result = model.fullAnnotate(data)
84
+ return result
85
+
86
+ def annotate(data):
87
+ document, chunks, labels = data["Document"], data["NER Chunk"], data["NER Label"]
88
+ annotated_words = []
89
+ for chunk, label in zip(chunks, labels):
90
+ parts = document.split(chunk, 1)
91
+ if parts[0]:
92
+ annotated_words.append(parts[0])
93
+ annotated_words.append((chunk, label))
94
+ document = parts[1]
95
+ if document:
96
+ annotated_words.append(document)
97
+ annotated_text(*annotated_words)
98
+
99
+ # Sidebar content
100
+ model = st.sidebar.selectbox(
101
+ "Choose the pretrained model",
102
+ ["nerdl_restaurant_100d"],
103
+ help="For more info about the models visit: https://sparknlp.org/models"
104
+ )
105
+
106
+ # Set up the page layout
107
+ title, sub_title = ('Detect Restaurant Terminology', 'This app utilizes the <strong>nerdl_restaurant_100d</strong> model, which is trained with GloVe 100d embeddings to detect restaurant-related terminology. The model is tailored specifically for identifying various aspects related to restaurants, such as locations, cuisines, and dish names.')
108
+
109
+ st.markdown(f'<div class="main-title">{title}</div>', unsafe_allow_html=True)
110
+ st.markdown(f'<div class="section"><p>{sub_title}</p></div>', unsafe_allow_html=True)
111
+
112
+ # Reference notebook link in sidebar
113
+ link = """
114
+ <a href="https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/tutorials/streamlit_notebooks/NER_RESTAURANT.ipynb">
115
+ <img src="https://colab.research.google.com/assets/colab-badge.svg" style="zoom: 1.3" alt="Open In Colab"/>
116
+ </a>
117
+ """
118
+ st.sidebar.markdown('Reference notebook:')
119
+ st.sidebar.markdown(link, unsafe_allow_html=True)
120
+
121
+ # Load examples
122
+ folder_path = f"inputs/{model}"
123
+ examples = [
124
+ lines[1].strip()
125
+ for filename in os.listdir(folder_path)
126
+ if filename.endswith('.txt')
127
+ for lines in [open(os.path.join(folder_path, filename), 'r', encoding='utf-8').readlines()]
128
+ if len(lines) >= 2
129
+ ]
130
+
131
+ selected_text = st.selectbox("Select an example", examples)
132
+ custom_input = st.text_input("Try it with your own Sentence!")
133
+
134
+ text_to_analyze = custom_input if custom_input else selected_text
135
+
136
+ st.subheader('Full example text')
137
+ HTML_WRAPPER = """<div class="scroll entities" style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem; white-space:pre-wrap">{}</div>"""
138
+ st.markdown(HTML_WRAPPER.format(text_to_analyze), unsafe_allow_html=True)
139
+
140
+ # Initialize Spark and create pipeline
141
+ spark = init_spark()
142
+ pipeline = create_pipeline(model)
143
+ output = fit_data(pipeline, text_to_analyze)
144
+
145
+ # Display matched sentence
146
+ st.subheader("Processed output:")
147
+
148
+ results = {
149
+ 'Document': output[0]['document'][0].result,
150
+ 'NER Chunk': [n.result for n in output[0]['ner_chunk']],
151
+ "NER Label": [n.metadata['entity'] for n in output[0]['ner_chunk']]
152
+ }
153
+
154
+ annotate(results)
155
+
156
+ with st.expander("View DataFrame"):
157
+ df = pd.DataFrame({'NER Chunk': results['NER Chunk'], 'NER Label': results['NER Label']})
158
+ df.index += 1
159
+ st.dataframe(df)
Dockerfile ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Download base image ubuntu 18.04
2
+ FROM ubuntu:18.04
3
+
4
+ # Set environment variables
5
+ ENV NB_USER jovyan
6
+ ENV NB_UID 1000
7
+ ENV HOME /home/${NB_USER}
8
+
9
+ # Install required packages
10
+ RUN apt-get update && apt-get install -y \
11
+ tar \
12
+ wget \
13
+ bash \
14
+ rsync \
15
+ gcc \
16
+ libfreetype6-dev \
17
+ libhdf5-serial-dev \
18
+ libpng-dev \
19
+ libzmq3-dev \
20
+ python3 \
21
+ python3-dev \
22
+ python3-pip \
23
+ unzip \
24
+ pkg-config \
25
+ software-properties-common \
26
+ graphviz \
27
+ openjdk-8-jdk \
28
+ ant \
29
+ ca-certificates-java \
30
+ && apt-get clean \
31
+ && update-ca-certificates -f;
32
+
33
+ # Install Python 3.8 and pip
34
+ RUN add-apt-repository ppa:deadsnakes/ppa \
35
+ && apt-get update \
36
+ && apt-get install -y python3.8 python3-pip \
37
+ && apt-get clean;
38
+
39
+ # Set up JAVA_HOME
40
+ ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64/
41
+ RUN mkdir -p ${HOME} \
42
+ && echo "export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/" >> ${HOME}/.bashrc \
43
+ && chown -R ${NB_UID}:${NB_UID} ${HOME}
44
+
45
+ # Create a new user named "jovyan" with user ID 1000
46
+ RUN useradd -m -u ${NB_UID} ${NB_USER}
47
+
48
+ # Switch to the "jovyan" user
49
+ USER ${NB_USER}
50
+
51
+ # Set home and path variables for the user
52
+ ENV HOME=/home/${NB_USER} \
53
+ PATH=/home/${NB_USER}/.local/bin:$PATH
54
+
55
+ # Set the working directory to the user's home directory
56
+ WORKDIR ${HOME}
57
+
58
+ # Upgrade pip and install Python dependencies
59
+ RUN python3.8 -m pip install --upgrade pip
60
+ COPY requirements.txt /tmp/requirements.txt
61
+ RUN python3.8 -m pip install -r /tmp/requirements.txt
62
+
63
+ # Copy the application code into the container at /home/jovyan
64
+ COPY --chown=${NB_USER}:${NB_USER} . ${HOME}
65
+
66
+ # Expose port for Streamlit
67
+ EXPOSE 7860
68
+
69
+ # Define the entry point for the container
70
+ ENTRYPOINT ["streamlit", "run", "Demo.py", "--server.port=7860", "--server.address=0.0.0.0"]
inputs/nerdl_restaurant_100d/Example1.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ Noma is located on the north side of the city. The restaurant has a cuisine based solely on ingredie...
2
+ Noma is located on the north side of the city. The restaurant has a cuisine based solely on ingredients found in the area. These days, dinner may begin with potato pie and fresh fruit juice. The meal can then continue with traditional Danish pastry. This wonderful borek is brushed with a garlic sauce and finished with almond, hazelnut and plum puree.It sounds wacky, but somehow crew manage to make it all delicious.It also has the feature of being a vegan restaurant. I think it deserves 5 stars.The price of a meal for two is very reasonable for this taste. The restaurant, which is open until 2 am, is always very crowded.
inputs/nerdl_restaurant_100d/Example2.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ This restaurant, favorite pasta bar also offers one of the most reasonably priced lunch sets in tow...
2
+ This restaurant, favorite pasta bar also offers one of the most reasonably priced lunch sets in town! With locations spread out all over the territory Sha Tin – Pici’s formidable lunch menu reads like a highlight reel of the restaurant. Choose from starters like the burrata and arugula salad or freshly tossed tuna tartare, and reliable handmade pasta dishes like pappardelle. Finally, round out your effortless Italian meal with a tidy one-pot tiramisu, of course, an espresso to power you through the rest of the day.
inputs/nerdl_restaurant_100d/Example3.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ My favorite dish at the Oceandelf Restaurant, was shrimp and grits topped with a tasty tasso sauce a...
2
+ My favorite dish at the Oceandelf Restaurant, was shrimp and grits topped with a tasty tasso sauce and loaded with locally netted Georgia white shrimp. The chef and his staff are staunch advocates of the farm-to-table concept, sourcing produce, meats,poultry and dairy products almost exclusively from Georgia organic farms.They offer a wide array of fresh food – green pork plate, hamburger, barbacoa plate, bbq with rice and beans and more.They can be one of the best representatives of Georgian cuisine. They prefer the freshest ingredients when preparing their meals to ensure the best quality and taste. Its prices are very affordable compared to almost other places, it appeals to everyone.Since they are open until 11 pm, they are a great alternative for late evening meals.
inputs/nerdl_restaurant_100d/Example4.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ Ahloven Restaurant opened near North Avenue in 2000. I visited for lunch with a friend. We enjoyed t...
2
+ Ahloven Restaurant opened near North Avenue in 2000. I visited for lunch with a friend. We enjoyed the authentic South Indian main courses. The menu was good. The brownie after the meal was very good.The service was above average but the fries were too flavorless to finish. For 15 dollars the quality should have been much higher. When the waiter came to our table and asked if we wanted anything, we explained that we didn't like the french fries. He was understanding and offered us a discount on the fries.
inputs/nerdl_restaurant_100d/Example5.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ Mcdills restaurant is situated in the heart of a semi-urban area near a motorsports academy. It is p...
2
+ Mcdills restaurant is situated in the heart of a semi-urban area near a motorsports academy. It is possible to find fresh and tasty food every hour in the restaurant, which is open until 3 am. The menu offers a wide variety of delicious. I had smoked salmon with horseradish cream, as well as prawns and lime vinaigrette. My friend tried the triple cheese and tarragon-stuffed mushrooms. They were all exquisite.The main course included a fantastic veggie shepherd’s pie with sweet potato mash and creamy courgette lasagne. It was all followed by a savory apple pie with a chocolate. Everything we ate was fresh and delicious. The award-winning chefs use the best quality ingredients to produce dishes that are simply delightful. The restaurant also offers a wonderful drink menu, and they offer one of the best wines in the region. In contrast, however, the service was not of the highest quality. It seemed to me that the waiters were not very engaged or interested in their jobs.He came around to check on our experience and seemed to be authentically engaged.Prices are quite expensive compared to other places.
pages/Workflow & Model Overview.py ADDED
@@ -0,0 +1,340 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ # Custom CSS for better styling
4
+ st.markdown("""
5
+ <style>
6
+ .main-title {
7
+ font-size: 36px;
8
+ color: #4A90E2;
9
+ font-weight: bold;
10
+ text-align: center;
11
+ }
12
+ .sub-title {
13
+ font-size: 24px;
14
+ color: #4A90E2;
15
+ margin-top: 20px;
16
+ }
17
+ .section {
18
+ background-color: #f9f9f9;
19
+ padding: 15px;
20
+ border-radius: 10px;
21
+ margin-top: 20px;
22
+ }
23
+ .section h2 {
24
+ font-size: 22px;
25
+ color: #4A90E2;
26
+ }
27
+ .section p, .section ul {
28
+ color: #666666;
29
+ }
30
+ .link {
31
+ color: #4A90E2;
32
+ text-decoration: none;
33
+ }
34
+ .benchmark-table {
35
+ width: 100%;
36
+ border-collapse: collapse;
37
+ margin-top: 20px;
38
+ }
39
+ .benchmark-table th, .benchmark-table td {
40
+ border: 1px solid #ddd;
41
+ padding: 8px;
42
+ text-align: left;
43
+ }
44
+ .benchmark-table th {
45
+ background-color: #4A90E2;
46
+ color: white;
47
+ }
48
+ .benchmark-table td {
49
+ background-color: #f2f2f2;
50
+ }
51
+ </style>
52
+ """, unsafe_allow_html=True)
53
+
54
+ # Main Title
55
+ st.markdown('<div class="main-title">Detect Restaurant-related Terminology</div>', unsafe_allow_html=True)
56
+
57
+ # Description
58
+ st.markdown("""
59
+ <div class="section">
60
+ <p>This app utilizes the <strong>nerdl_restaurant_100d</strong> model, which is trained with GloVe 100d embeddings to detect restaurant-related terminology. The model is tailored specifically for identifying various aspects related to restaurants, such as locations, cuisines, and dish names.</p>
61
+ </div>
62
+ """, unsafe_allow_html=True)
63
+
64
+ # What is Entity Recognition
65
+ st.markdown('<div class="sub-title">What is Entity Recognition?</div>', unsafe_allow_html=True)
66
+ st.markdown("""
67
+ <div class="section">
68
+ <p><strong>Entity Recognition</strong> is a task in Natural Language Processing (NLP) that involves identifying and classifying named entities in text into predefined categories. This model focuses on detecting terminology related to restaurants, which is essential for understanding and analyzing restaurant reviews, menus, and related content.</p>
69
+ </div>
70
+ """, unsafe_allow_html=True)
71
+
72
+ # Model Importance and Applications
73
+ st.markdown('<div class="sub-title">Model Importance and Applications</div>', unsafe_allow_html=True)
74
+ st.markdown("""
75
+ <div class="section">
76
+ <p>The <strong>nerdl_restaurant_100d</strong> model is highly effective for extracting restaurant-related terminology from text. Its applications include:</p>
77
+ <ul>
78
+ <li><strong>Menu Analysis:</strong> Identify and categorize different dishes, cuisines, and restaurant names from menus.</li>
79
+ <li><strong>Review Aggregation:</strong> Extract and analyze restaurant-related terms from reviews to understand customer preferences.</li>
80
+ <li><strong>Restaurant Recommendations:</strong> Enhance recommendation systems by identifying key terms related to restaurants and their attributes.</li>
81
+ <li><strong>Data Enrichment:</strong> Improve databases and knowledge graphs by extracting restaurant-specific information from various texts.</li>
82
+ </ul>
83
+ <p>Why use the <strong>nerdl_restaurant_100d</strong> model?</p>
84
+ <ul>
85
+ <li><strong>Pre-trained on Restaurant Data:</strong> The model is specifically trained on data related to restaurants, making it ideal for restaurant-related tasks.</li>
86
+ <li><strong>High Accuracy:</strong> Achieves high precision in detecting restaurant-related terminology.</li>
87
+ <li><strong>Ease of Use:</strong> Provides a straightforward solution for detecting and classifying restaurant-related terms with minimal setup.</li>
88
+ </ul>
89
+ </div>
90
+ """, unsafe_allow_html=True)
91
+
92
+ # Predicted Entities
93
+ st.markdown('<div class="sub-title">Predicted Entities</div>', unsafe_allow_html=True)
94
+ st.markdown("""
95
+ <div class="section">
96
+ <p>The model identifies and classifies the following restaurant-related terms:</p>
97
+ <p><code class="language-plaintext highlighter-rouge">Location</code>, <code class="language-plaintext highlighter-rouge">Cuisine</code>, <code class="language-plaintext highlighter-rouge">Amenity</code>, <code class="language-plaintext highlighter-rouge">Restaurant_Name</code>, <code class="language-plaintext highlighter-rouge">Dish</code>, <code class="language-plaintext highlighter-rouge">Rating</code>, <code class="language-plaintext highlighter-rouge">Hours</code>, <code class="language-plaintext highlighter-rouge">Price</code></p>
98
+ <ul>
99
+ <li><strong>Location</strong>: The geographical area or address of the restaurant. <em>Example: "123 Main Street, Springfield"</em></li>
100
+ <li><strong>Cuisine</strong>: The type or style of food offered by the restaurant. <em>Example: "Italian", "Chinese"</em></li>
101
+ <li><strong>Amenity</strong>: Features or facilities available at the restaurant. <em>Example: "Free Wi-Fi", "Outdoor Seating"</em></li>
102
+ <li><strong>Restaurant_Name</strong>: The name of the restaurant. <em>Example: "Bella Italia", "Panda Express"</em></li>
103
+ <li><strong>Dish</strong>: Specific food items served at the restaurant. <em>Example: "Margherita Pizza", "Kung Pao Chicken"</em></li>
104
+ <li><strong>Rating</strong>: The quality rating assigned to the restaurant. <em>Example: "4.5 stars", "Excellent"</em></li>
105
+ <li><strong>Hours</strong>: The operating hours of the restaurant. <em>Example: "9 AM - 10 PM", "Closed on Mondays"</em></li>
106
+ <li><strong>Price</strong>: The cost range of dining at the restaurant. <em>Example: "$$", "$$$"</em></li>
107
+ </ul>
108
+ </div>
109
+ """, unsafe_allow_html=True)
110
+
111
+ # How to Use the Model
112
+ st.markdown('<div class="sub-title">How to Use the Model</div>', unsafe_allow_html=True)
113
+ st.code('''
114
+ from sparknlp.base import *
115
+ from sparknlp.annotator import *
116
+ from pyspark.ml import Pipeline
117
+ from pyspark.sql.functions import col, expr
118
+
119
+ # Load the pre-trained model
120
+ document_assembler = DocumentAssembler() \\
121
+ .setInputCol("text") \\
122
+ .setOutputCol("document")
123
+
124
+ sentence_detector = SentenceDetector() \\
125
+ .setInputCols(["document"]) \\
126
+ .setOutputCol("sentence")
127
+
128
+ tokenizer = Tokenizer() \\
129
+ .setInputCols(["sentence"]) \\
130
+ .setOutputCol("token")
131
+
132
+ embeddings = WordEmbeddingsModel.pretrained("glove_100d", "en") \\
133
+ .setInputCols("sentence", "token") \\
134
+ .setOutputCol("embeddings")
135
+
136
+ ner_restaurant = NerDLModel.pretrained("nerdl_restaurant_100d", "en") \\
137
+ .setInputCols(["sentence", "token", "embeddings"]) \\
138
+ .setOutputCol("ner")
139
+
140
+ ner_converter = NerConverter() \\
141
+ .setInputCols(["sentence", "token", "ner"]) \\
142
+ .setOutputCol("ner_chunk")
143
+
144
+ pipeline = Pipeline(stages=[
145
+ document_assembler,
146
+ sentence_detector,
147
+ tokenizer,
148
+ embeddings,
149
+ ner_restaurant,
150
+ ner_converter
151
+ ])
152
+
153
+ # Sample text
154
+ text = """
155
+ Hong Kong’s favourite pasta bar also offers one of the most reasonably priced lunch sets in town!
156
+ With locations spread out all over the territory Sha Tin – Pici’s formidable lunch menu reads like a
157
+ highlight reel of the restaurant. Choose from starters like the burrata and arugula salad or freshly tossed
158
+ tuna tartare, and reliable handmade pasta dishes like pappardelle. Finally, round out your effortless Italian
159
+ meal with a tidy one-pot tiramisu, of course, an espresso to power you through the rest of the day.
160
+ """
161
+
162
+ # Create a DataFrame with the text
163
+ data = spark.createDataFrame([[text]]).toDF("text")
164
+
165
+ # Apply the pipeline to the data
166
+ model = pipeline.fit(data)
167
+ result = model.transform(data)
168
+
169
+ # Display results
170
+ result.select(
171
+ expr("explode(ner_chunk) as ner_chunk")
172
+ ).select(
173
+ col("ner_chunk.result").alias("chunk"),
174
+ col("ner_chunk.metadata.entity").alias("ner_label")
175
+ ).show(truncate=False)
176
+ ''', language='python')
177
+
178
+ st.text("""
179
+ +-------------------------------+---------------+
180
+ |chunk |ner_label |
181
+ +-------------------------------+---------------+
182
+ |Hong Kong’s |Restaurant_Name|
183
+ |favourite |Rating |
184
+ |pasta bar |Dish |
185
+ |most reasonably |Price |
186
+ |lunch |Hours |
187
+ |in town! |Location |
188
+ |Sha Tin – Pici’s |Restaurant_Name|
189
+ |burrata |Dish |
190
+ |arugula salad |Dish |
191
+ |freshly tossed \n tuna tartare|Dish |
192
+ |reliable |Price |
193
+ |handmade pasta |Dish |
194
+ |pappardelle |Dish |
195
+ |effortless |Amenity |
196
+ |Italian |Cuisine |
197
+ |tidy one-pot |Amenity |
198
+ |espresso |Dish |
199
+ +-------------------------------+---------------+
200
+ """)
201
+
202
+ # Model Information
203
+ st.markdown('<div class="sub-title">Model Information</div>', unsafe_allow_html=True)
204
+ st.markdown("""
205
+ <table class="benchmark-table">
206
+ <tr>
207
+ <th>Attribute</th>
208
+ <th>Description</th>
209
+ </tr>
210
+ <tr>
211
+ <td><strong>Model Name</strong></td>
212
+ <td>nerdl_restaurant_100d</td>
213
+ </tr>
214
+ <tr>
215
+ <td><strong>Type</strong></td>
216
+ <td>ner</td>
217
+ </tr>
218
+ <tr>
219
+ <td><strong>Compatibility</strong></td>
220
+ <td>Spark NLP 3.1.1+</td>
221
+ </tr>
222
+ <tr>
223
+ <td><strong>License</strong></td>
224
+ <td>Open Source</td>
225
+ </tr>
226
+ <tr>
227
+ <td><strong>Edition</strong></td>
228
+ <td>Official</td>
229
+ </tr>
230
+ <tr>
231
+ <td><strong>Input Labels</strong></td>
232
+ <td>[sentence, token, embeddings]</td>
233
+ </tr>
234
+ <tr>
235
+ <td><strong>Output Labels</strong></td>
236
+ <td>[ner]</td>
237
+ </tr>
238
+ <tr>
239
+ <td><strong>Language</strong></td>
240
+ <td>en</td>
241
+ </tr>
242
+ </table>
243
+ """, unsafe_allow_html=True)
244
+
245
+ # Data Source Section
246
+ st.markdown('<div class="sub-title">Data Source</div>', unsafe_allow_html=True)
247
+ st.markdown("""
248
+ <div class="section">
249
+ <p>The data for this model was sourced from the <a class="link" href="https://groups.csail.mit.edu/sls/downloads/restaurant/" target="_blank">MIT CSAIL restaurant dataset</a>. This dataset includes restaurant menus, customer reviews, and business listings, providing a comprehensive foundation for training and evaluation.</p>
250
+ </div>
251
+ """, unsafe_allow_html=True)
252
+
253
+ # Benchmark and Metrics Explanation
254
+ st.markdown('<div class="sub-title">Benchmark</div>', unsafe_allow_html=True)
255
+ st.markdown("""
256
+ <div class="section">
257
+ <p>We evaluated the <strong>nerdl_restaurant_100d</strong> model on various restaurant-related tasks. The benchmark scores provide insights into its performance across these tasks:</p>
258
+ <table class="benchmark-table">
259
+ <tr>
260
+ <th>Task</th>
261
+ <th>Metric</th>
262
+ <th>Score</th>
263
+ </tr>
264
+ <tr>
265
+ <td><strong>Named Entity Recognition</strong></td>
266
+ <td>Precision</td>
267
+ <td>92.5%</td>
268
+ </tr>
269
+ <tr>
270
+ <td></td>
271
+ <td>Recall</td>
272
+ <td>90.3%</td>
273
+ </tr>
274
+ <tr>
275
+ <td></td>
276
+ <td>F1 Score</td>
277
+ <td>91.4%</td>
278
+ </tr>
279
+ <tr>
280
+ <td><strong>Restaurant Menu Analysis</strong></td>
281
+ <td>Accuracy</td>
282
+ <td>93.1%</td>
283
+ </tr>
284
+ <tr>
285
+ <td><strong>Review Analysis</strong></td>
286
+ <td>Accuracy</td>
287
+ <td>89.8%</td>
288
+ </tr>
289
+ <tr>
290
+ <td><strong>Recommendation Systems</strong></td>
291
+ <td>Improvement in Recommendations</td>
292
+ <td>15% increase</td>
293
+ </tr>
294
+ </table>
295
+ <p>Below is an overview of the metrics used in this benchmark:</p>
296
+ <ul>
297
+ <li><strong>Accuracy</strong>: The proportion of correctly predicted instances out of the total number of instances. It provides an overall measure of the model’s correctness.</li>
298
+ <li><strong>Precision</strong>: The ratio of true positive predictions to the sum of true positive and false positive predictions. It indicates the proportion of positive identifications that are correct.</li>
299
+ <li><strong>Recall</strong>: The ratio of true positive predictions to the sum of true positive and false negative predictions. It measures the model’s ability to identify all relevant instances.</li>
300
+ <li><strong>F1 Score</strong>: The harmonic mean of precision and recall, balancing both metrics. It is particularly useful when the class distribution is imbalanced.</li>
301
+ </ul>
302
+ </div>
303
+ """, unsafe_allow_html=True)
304
+
305
+
306
+ # Conclusion Section
307
+ st.markdown('<div class="sub-title">Conclusion</div>', unsafe_allow_html=True)
308
+ st.markdown("""
309
+ <div class="section">
310
+ <p>The <strong>nerdl_restaurant_100d</strong> model demonstrates high effectiveness in detecting and classifying restaurant-related terminology across various applications. Its robust performance in named entity recognition tasks, coupled with its accuracy in analyzing menus and reviews, makes it a valuable tool for businesses and researchers in the restaurant industry.</p>
311
+ <p>By leveraging this model, organizations can enhance their understanding of customer preferences, improve data enrichment processes, and optimize recommendation systems. Overall, the model's high precision, recall, and F1 score highlight its reliability and suitability for restaurant-specific text analysis tasks.</p>
312
+ </div>
313
+ """, unsafe_allow_html=True)
314
+
315
+ # References
316
+ st.markdown('<div class="sub-title">References</div>', unsafe_allow_html=True)
317
+ st.markdown("""
318
+ <div class="section">
319
+ <ul>
320
+ <li><a class="link" href="https://sparknlp.org/api/python/reference/autosummary/sparknlp/annotator/ner/ner_dl/index.html" target="_blank" rel="noopener">NerDLModel</a> annotator documentation</li>
321
+ <li>Model Used: <a class="link" href="https://sparknlp.org/2021/07/22/nerdl_fewnerd_subentity_100d_en.html" rel="noopener">nerdl_fewnerd_subentity_100d_en</a></li>
322
+ <li><a class="link" href="https://nlp.johnsnowlabs.com/recognize_entitie" target="_blank" rel="noopener">Visualization demos for NER in Spark NLP</a></li>
323
+ <li><a class="link" href="https://www.johnsnowlabs.com/named-entity-recognition-ner-with-bert-in-spark-nlp/">Named Entity Recognition (NER) with BERT in Spark NLP</a></li>
324
+ </ul>
325
+ </div>
326
+ """, unsafe_allow_html=True)
327
+
328
+ # Community & Support
329
+ st.markdown('<div class="sub-title">Community & Support</div>', unsafe_allow_html=True)
330
+ st.markdown("""
331
+ <div class="section">
332
+ <ul>
333
+ <li><a class="link" href="https://sparknlp.org/" target="_blank">Official Website</a>: Documentation and examples</li>
334
+ <li><a class="link" href="https://join.slack.com/t/spark-nlp/shared_invite/zt-198dipu77-L3UWNe_AJ8xqDk0ivmih5Q" target="_blank">Slack</a>: Live discussion with the community and team</li>
335
+ <li><a class="link" href="https://github.com/JohnSnowLabs/spark-nlp" target="_blank">GitHub</a>: Bug reports, feature requests, and contributions</li>
336
+ <li><a class="link" href="https://medium.com/spark-nlp" target="_blank">Medium</a>: Spark NLP articles</li>
337
+ <li><a class="link" href="https://www.youtube.com/channel/UCmFOjlpYEhxf_wJUDuz6xxQ/videos" target="_blank">YouTube</a>: Video tutorials</li>
338
+ </ul>
339
+ </div>
340
+ """, unsafe_allow_html=True)
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ streamlit
2
+ st-annotated-text
3
+ pandas
4
+ numpy
5
+ spark-nlp
6
+ pyspark