Spaces:

spark-nlp
/

TAPAS

Sleeping

App Files Files Community

abdullahmubeen10 commited on Aug 9, 2024

Commit

c309169

verified ·

1 Parent(s): 8bc0ff0

Upload 5 files

Browse files

Files changed (5) hide show

.streamlit/config.toml +3 -0
Demo.py +175 -0
Dockerfile +70 -0
pages/Workflow & Model Overview.py +364 -0
requirements.txt +6 -0

.streamlit/config.toml ADDED Viewed

	@@ -0,0 +1,3 @@

+[theme]
+base="light"
+primaryColor="#29B4E8"

Demo.py ADDED Viewed

	@@ -0,0 +1,175 @@

+import streamlit as st
+import sparknlp
+import pandas as pd
+import json
+from sparknlp.base import *
+from sparknlp.annotator import *
+from pyspark.ml import Pipeline
+from sparknlp.pretrained import PretrainedPipeline
+# Page configuration
+st.set_page_config(
+    layout="wide",
+    initial_sidebar_state="auto"
+)
+# CSS for styling
+st.markdown("""
+    <style>
+        .main-title {
+            font-size: 36px;
+            color: #4A90E2;
+            font-weight: bold;
+            text-align: center;
+        }
+        .section {
+            background-color: #f9f9f9;
+            padding: 10px;
+            border-radius: 10px;
+            margin-top: 10px;
+        }
+        .section p, .section ul {
+            color: #666666;
+        }
+    </style>
+""", unsafe_allow_html=True)
+@st.cache_resource
+def init_spark():
+    return sparknlp.start()
+@st.cache_resource
+def create_pipeline(model):
+    document_assembler = MultiDocumentAssembler() \
+        .setInputCols("table_json", "questions") \
+        .setOutputCols("document_table", "document_questions")
+    sentence_detector = SentenceDetector() \
+        .setInputCols(["document_questions"]) \
+        .setOutputCol("questions")
+    table_assembler = TableAssembler()\
+        .setInputCols(["document_table"])\
+        .setOutputCol("table")
+    tapas_wtq = TapasForQuestionAnswering\
+        .pretrained("table_qa_tapas_base_finetuned_wtq", "en")\
+        .setInputCols(["questions", "table"])\
+        .setOutputCol("answers_wtq")
+    tapas_sqa = TapasForQuestionAnswering\
+        .pretrained("table_qa_tapas_base_finetuned_sqa", "en")\
+        .setInputCols(["questions", "table"])\
+        .setOutputCol("answers_sqa")
+    pipeline = Pipeline(stages=[document_assembler, sentence_detector, table_assembler, tapas_wtq, tapas_sqa])
+    return pipeline
+def fit_data(pipeline, json_data, question):
+    spark_df = spark.createDataFrame([[json_data, question]]).toDF("table_json", "questions")
+    model = pipeline.fit(spark_df)
+    result = model.transform(spark_df)
+    return result.select("answers_wtq.result", "answers_sqa.result").collect()
+# Sidebar content
+model = st.sidebar.selectbox(
+    "Choose the pretrained model",
+    ["table_qa_tapas_base_finetuned_wtq", "table_qa_tapas_base_finetuned_sqa"],
+    help="For more info about the models visit: https://sparknlp.org/models"
+)
+# Set up the page layout
+title = 'TAPAS for Table-Based Question Answering with Spark NLP'
+sub_title = (
+    'TAPAS (Table Parsing Supervised via Pre-trained Language Models) is a model that extends '
+    'the BERT architecture to handle tabular data. Unlike traditional models that require flattening '
+    'tables into text, TAPAS can directly interpret tables, making it a powerful tool for answering '
+    'questions that involve tabular data.'
+)
+st.markdown(f'<div class="main-title">{title}</div>', unsafe_allow_html=True)
+st.markdown(f'<div class="section"><p>{sub_title}</p></div>', unsafe_allow_html=True)
+# Reference notebook link in sidebar
+link = """
+<a href="https://github.com/JohnSnowLabs/spark-nlp-workshop/blob/master/tutorials/Certification_Trainings/Public/15.1_Table_Question_Answering.ipynb">
+    <img src="https://colab.research.google.com/assets/colab-badge.svg" style="zoom: 1.3" alt="Open In Colab"/>
+</a>
+"""
+st.sidebar.markdown('Reference notebook:')
+st.sidebar.markdown(link, unsafe_allow_html=True)
+# Define the JSON data for the table
+# New JSON data
+json_data = '''
+{
+  "header": ["name", "net_worth", "age", "nationality", "company", "industry"],
+  "rows": [
+    ["Elon Musk", "$200,000,000,000", "52", "American", "Tesla, SpaceX", "Automotive, Aerospace"],
+    ["Jeff Bezos", "$150,000,000,000", "60", "American", "Amazon", "E-commerce"],
+    ["Bernard Arnault", "$210,000,000,000", "74", "French", "LVMH", "Luxury Goods"],
+    ["Bill Gates", "$120,000,000,000", "68", "American", "Microsoft", "Technology"],
+    ["Warren Buffett", "$110,000,000,000", "93", "American", "Berkshire Hathaway", "Conglomerate"],
+    ["Larry Page", "$100,000,000,000", "51", "American", "Google", "Technology"],
+    ["Mark Zuckerberg", "$85,000,000,000", "40", "American", "Meta", "Social Media"],
+    ["Mukesh Ambani", "$80,000,000,000", "67", "Indian", "Reliance Industries", "Conglomerate"],
+    ["Alice Walton", "$65,000,000,000", "74", "American", "Walmart", "Retail"],
+    ["Francoise Bettencourt Meyers", "$70,000,000,000", "70", "French", "L'Oreal", "Cosmetics"],
+    ["Amancio Ortega", "$75,000,000,000", "88", "Spanish", "Inditex (Zara)", "Retail"],
+    ["Carlos Slim", "$55,000,000,000", "84", "Mexican", "America Movil", "Telecom"]
+  ]
+}
+'''
+# Define queries for selection
+queries = [
+    "Who has a higher net worth, Bernard Arnault or Jeff Bezos?",
+    "List the top three individuals by net worth.",
+    "Who is the richest person in the technology industry?",
+    "Which company in the e-commerce industry has the highest net worth?",
+    "Who is the oldest billionaire on the list?",
+    "Which individual under the age of 60 has the highest net worth?",
+    "Who is the wealthiest American, and which company do they own?",
+    "Find all French billionaires and list their companies.",
+    "How many women are on the list, and what are their total net worths?",
+    "Who is the wealthiest non-American on the list?",
+    "Find the person who is the youngest and has a net worth over $100 billion.",
+    "Who owns companies in more than one industry, and what are those industries?",
+    "What is the total net worth of all individuals over 70?",
+    "How many billionaires are in the conglomerate industry?"
+]
+# Load the JSON data into a DataFrame and display it
+table_data = json.loads(json_data)
+df_table = pd.DataFrame(table_data["rows"], columns=table_data["header"])
+df_table.index += 1
+st.write("")
+st.write("Context DataFrame (Click To Edit)")
+edited_df = st.data_editor(df_table)
+# Convert edited DataFrame back to JSON format
+table_json_data = {
+    "header": edited_df.columns.tolist(),
+    "rows": edited_df.values.tolist()
+}
+table_json_str = json.dumps(table_json_data)
+# User input for questions
+selected_text = st.selectbox("Question Query", queries)
+custom_input = st.text_input("Try it with your own Question!")
+text_to_analyze = custom_input if custom_input else selected_text
+# Initialize Spark and create the pipeline
+spark = init_spark()
+pipeline = create_pipeline(model)
+# Run the pipeline with the selected query and the converted table data
+output = fit_data(pipeline, table_json_str, text_to_analyze)
+# Display the output
+st.markdown("---")
+st.subheader("Processed output:")
+st.write("**Answer:**", ', '.join(output[0][0]))

Dockerfile ADDED Viewed

	@@ -0,0 +1,70 @@

+# Download base image ubuntu 18.04
+FROM ubuntu:18.04
+# Set environment variables
+ENV NB_USER jovyan
+ENV NB_UID 1000
+ENV HOME /home/${NB_USER}
+# Install required packages
+RUN apt-get update && apt-get install -y \
+    tar \
+    wget \
+    bash \
+    rsync \
+    gcc \
+    libfreetype6-dev \
+    libhdf5-serial-dev \
+    libpng-dev \
+    libzmq3-dev \
+    python3 \
+    python3-dev \
+    python3-pip \
+    unzip \
+    pkg-config \
+    software-properties-common \
+    graphviz \
+    openjdk-8-jdk \
+    ant \
+    ca-certificates-java \
+    && apt-get clean \
+    && update-ca-certificates -f;
+# Install Python 3.8 and pip
+RUN add-apt-repository ppa:deadsnakes/ppa \
+    && apt-get update \
+    && apt-get install -y python3.8 python3-pip \
+    && apt-get clean;
+# Set up JAVA_HOME
+ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64/
+RUN mkdir -p ${HOME} \
+    && echo "export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/" >> ${HOME}/.bashrc \
+    && chown -R ${NB_UID}:${NB_UID} ${HOME}
+# Create a new user named "jovyan" with user ID 1000
+RUN useradd -m -u ${NB_UID} ${NB_USER}
+# Switch to the "jovyan" user
+USER ${NB_USER}
+# Set home and path variables for the user
+ENV HOME=/home/${NB_USER} \
+    PATH=/home/${NB_USER}/.local/bin:$PATH
+# Set the working directory to the user's home directory
+WORKDIR ${HOME}
+# Upgrade pip and install Python dependencies
+RUN python3.8 -m pip install --upgrade pip
+COPY requirements.txt /tmp/requirements.txt
+RUN python3.8 -m pip install -r /tmp/requirements.txt
+# Copy the application code into the container at /home/jovyan
+COPY --chown=${NB_USER}:${NB_USER} . ${HOME}
+# Expose port for Streamlit
+EXPOSE 7860
+# Define the entry point for the container
+ENTRYPOINT ["streamlit", "run", "Demo.py", "--server.port=7860", "--server.address=0.0.0.0"]

pages/Workflow & Model Overview.py ADDED Viewed

	@@ -0,0 +1,364 @@

+import streamlit as st
+import pandas as pd
+# Custom CSS for better styling
+st.markdown("""
+    <style>
+        .main-title {
+            font-size: 36px;
+            color: #4A90E2;
+            font-weight: bold;
+            text-align: center;
+        }
+        .sub-title {
+            font-size: 24px;
+            color: #4A90E2;
+            margin-top: 20px;
+        }
+        .section {
+            background-color: #f9f9f9;
+            padding: 15px;
+            border-radius: 10px;
+            margin-top: 20px;
+        }
+        .section p, .section ul {
+            color: #666666;
+        }
+        .link {
+            color: #4A90E2;
+            text-decoration: none;
+        }
+        h2 {
+            color: #4A90E2;
+            font-size: 28px;
+            font-weight: bold;
+            margin-top: 30px;
+        }
+        h3 {
+            color: #4A90E2;
+            font-size: 22px;
+            font-weight: bold;
+            margin-top: 20px;
+        }
+        h4 {
+            color: #4A90E2;
+            font-size: 18px;
+            font-weight: bold;
+            margin-top: 15px;
+        }
+    </style>
+""", unsafe_allow_html=True)
+# Main Title
+st.markdown('<div class="main-title">Question Answering Over Tables with TAPAS and Spark NLP</div>', unsafe_allow_html=True)
+# Overview Section
+st.markdown("""
+<div class="section">
+    <p>As data becomes increasingly complex, extracting meaningful insights from tabular data is more important than ever. TAPAS, a transformer-based model developed by Google, is designed specifically to handle question-answering over tables. By combining TAPAS with Spark NLP, we can leverage the power of distributed computing to process large datasets efficiently.</p>
+    <p>This guide will walk you through the process of setting up TAPAS in Spark NLP, implementing two specific models (<code>table_qa_tapas_base_finetuned_wtq</code> and <code>table_qa_tapas_base_finetuned_sqa</code>), and understanding their best use cases.</p>
+</div>
+""", unsafe_allow_html=True)
+# Introduction to TAPAS and Spark NLP
+st.markdown('<div class="sub-title">Introduction to TAPAS and Spark NLP</div>', unsafe_allow_html=True)
+# What is TAPAS?
+st.markdown("""
+<div class="section">
+    <h3>What is TAPAS?</h3>
+    <p>TAPAS (Table Parsing Supervised via Pre-trained Language Models) is a model that extends the BERT architecture to handle tabular data. Unlike traditional models that require flattening tables into text, TAPAS can directly interpret tables, making it a powerful tool for answering questions that involve tabular data.</p>
+</div>
+""", unsafe_allow_html=True)
+# Why Use TAPAS with Spark NLP?
+st.markdown("""
+<div class="section">
+    <h3>Why Use TAPAS with Spark NLP?</h3>
+    <p>Spark NLP, developed by John Snow Labs, is an open-source library that provides state-of-the-art natural language processing capabilities within a distributed computing framework. Integrating TAPAS with Spark NLP allows you to scale your question-answering tasks across large datasets, making it ideal for big data environments.</p>
+</div>
+""", unsafe_allow_html=True)
+# Pipeline and Results
+st.markdown('<div class="sub-title">Pipeline and Results</div>', unsafe_allow_html=True)
+st.markdown("""
+<div class="section">
+    <p>In this section, we’ll build a pipeline using Spark NLP to process a table and answer questions about the data it contains. We will utilize two different TAPAS models, each suited for different types of queries.</p>
+</div>
+""", unsafe_allow_html=True)
+# Step 1: Creating the Data
+st.markdown("""
+<div class="section">
+    <h4>Step 1: Creating the Data</h4>
+    <p>We'll start by creating a Spark DataFrame that includes a table in JSON format and a set of questions.</p>
+""", unsafe_allow_html=True)
+st.code("""
+json_data = '''
+{
+  "header": ["name", "money", "age"],
+  "rows": [
+    ["Donald Trump", "$100,000,000", "75"],
+    ["Elon Musk", "$20,000,000,000,000", "55"]
+  ]
+}
+'''
+queries = [
+    "Who earns less than 200,000,000?",
+    "Who earns 100,000,000?",
+    "How much money has Donald Trump?",
+    "How old are they?",
+    "How much money have they total?",
+    "Who earns more than Donald Trump?"
+]
+data = spark.createDataFrame([[json_data, " ".join(queries)]])\\
+    .toDF("table_json", "questions")
+""", language="python")
+# Step 2: Assembling the Pipeline
+st.markdown("""
+<div class="section">
+    <h4>Step 2: Assembling the Pipeline</h4>
+    <p>We will now set up a Spark NLP pipeline that includes the necessary annotators for processing the table and questions.</p>
+""", unsafe_allow_html=True)
+st.code("""
+from sparknlp.annotator import TapasForQuestionAnswering, SentenceDetector
+from sparknlp.base import MultiDocumentAssembler, TableAssembler
+from pyspark.ml import Pipeline
+from pyspark.sql import functions as F
+# Step 1: Transforms raw texts to `document` annotation
+document_assembler = MultiDocumentAssembler() \\
+    .setInputCols("table_json", "questions") \\
+    .setOutputCols("document_table", "document_questions")
+# Step 2: Getting the sentences
+sentence_detector = SentenceDetector() \\
+    .setInputCols(["document_questions"]) \\
+    .setOutputCol("questions")
+# Step 3: Get the tables
+table_assembler = TableAssembler()\\
+    .setInputCols(["document_table"])\\
+    .setOutputCol("table")
+# WTQ TAPAS model
+tapas_wtq = TapasForQuestionAnswering\\
+    .pretrained("table_qa_tapas_base_finetuned_wtq", "en")\\
+    .setInputCols(["questions", "table"])\\
+    .setOutputCol("answers_wtq")
+# SQA TAPAS model
+tapas_sqa = TapasForQuestionAnswering\\
+    .pretrained("table_qa_tapas_base_finetuned_sqa", "en")\\
+    .setInputCols(["questions", "table"])\\
+    .setOutputCol("answers_sqa")
+# Define pipeline
+pipeline = Pipeline(stages=[
+    document_assembler,
+    sentence_detector,
+    table_assembler,
+    tapas_wtq,
+    tapas_sqa
+])
+# Fit and transform data
+model = pipeline.fit(data)
+result = model.transform(data)
+""", language="python")
+# Step 3: Viewing the Results
+st.markdown("""
+<div class="section">
+    <h4>Step 3: Viewing the Results</h4>
+    <p>After processing, we can explore the results generated by each model:</p>
+""", unsafe_allow_html=True)
+st.code("""
+# WTQ Model Results:
+result.select(F.explode(result.answers_wtq)).show(truncate=False)
+""", language="python")
+st.text("""
++--------------------------------------+
+|col                                   |
++--------------------------------------+
+|Donald Trump                          |
+|Donald Trump                          |
+|SUM($100,000,000)                     |
+|AVERAGE(75, 55)                       |
+|SUM($100,000,000, $20,000,000,000,000)|
+|Elon Musk                             |
++--------------------------------------+
+""")
+st.code("""
+# SQA Model Results:
+result.select(F.explode(result.answers_sqa)).show(truncate=False)
+""", language="python")
+st.text("""
++---------------------------------+
+|col                              |
++---------------------------------+
+|Donald Trump                     |
+|Donald Trump                     |
+|$100,000,000                     |
+|75, 55                           |
+|$100,000,000, $20,000,000,000,000|
+|Elon Musk                        |
++---------------------------------+
+""")
+# Comparing Results
+st.markdown("""
+<div class="section">
+    <h4>Comparing Results</h4>
+    <p>To better understand the differences, we can compare the results from both models side by side:</p>
+""", unsafe_allow_html=True)
+st.code("""
+result.select(F.explode(F.arrays_zip(result.questions.result,
+                                     result.answers_sqa.result,
+                                     result.answers_wtq.result)).alias("cols"))\\
+      .select(F.expr("cols['0']").alias("question"),
+              F.expr("cols['1']").alias("answer_sqa"),
+              F.expr("cols['2']").alias("answer_wtq")).show(truncate=False)
+""", language="python")
+st.text("""
++---------------------------------+---------------------------------+--------------------------------------+
+|question                         |answer_sqa                       |answer_wtq                            |
++---------------------------------+---------------------------------+--------------------------------------+
+|Who earns less than 200,000,000? |Donald Trump                     |Donald Trump                          |
+|Who earns 100,000,000?           |Donald Trump                     |Donald Trump                          |
+|How much money has Donald Trump? |$100,000,000                     |SUM($100,000,000)                     |
+|How old are they?                |75, 55                           |AVERAGE(75, 55)                       |
+|How much money have they total?  |$100,000,000, $20,000,000,000,000|SUM($100,000,000, $20,000,000,000,000)|
+|Who earns more than Donald Trump?|Elon Musk                        |Elon Musk                             |
++---------------------------------+---------------------------------+--------------------------------------+
+""")
+# One-Liner Alternative
+st.markdown("""
+<div class="section">
+    <h4>One-Liner Alternative</h4>
+    <p>For those who prefer a simpler approach, John Snow Labs offers a one-liner API to quickly get answers using TAPAS models.</p>
+""", unsafe_allow_html=True)
+st.code("""
+#Downliad the johnsnowlabs library
+pip install johnsnowlabs
+""", language="bash")
+st.code("""
+import pandas as pd
+from johnsnowlabs import nlp
+# Create the context DataFrame
+context_df = pd.DataFrame({
+    'name': ['Donald Trump', 'Elon Musk'],
+    'money': ['$100,000,000', '$20,000,000,000,000'],
+    'age': ['75', '55']
+})
+# Define the questions
+questions = [
+    "Who earns less than 200,000,000?",
+    "Who earns 100,000,000?",
+    "How much money has Donald Trump?",
+    "How old are they?",
+    "How much money have they total?",
+    "Who earns more than Donald Trump?"
+]
+# Combine context and questions into a tuple
+tapas_data = (context_df, questions)
+# Use the one-liner API with the WTQ model
+answers_wtq = nlp.load('en.answer_question.tapas.wtq.large_finetuned').predict(tapas_data)
+answers_wtq[['sentence', 'tapas_qa_UNIQUE_answer']]
+""", language="python")
+# Define the data as a list of dictionaries
+data = {
+    "sentence": [
+        "Who earns less than 200,000,000?",
+        "Who earns 100,000,000?",
+        "How much money has Donald Trump?",
+        "How old are they?",
+        "How much money have they total? Who earns more..."
+    ],
+    "tapas_qa_UNIQUE_answer": [
+        "Donald Trump",
+        "Donald Trump",
+        "SUM($100,000,000)",
+        "SUM(55)",
+        "SUM($20,000,000,000,000)"
+    ]
+}
+st.dataframe(pd.DataFrame(data))
+# Model Information and Use Cases
+st.markdown("""
+<div class="section">
+    <h4>Model Information and Use Cases</h4>
+    <p>Understanding the strengths of each TAPAS model can help you choose the right tool for your task.</p>
+    <ul>
+        <li><b>table_qa_tapas_base_finetuned_wtq</b></li>
+        <ul>
+            <li>Best for: answering questions involving table-wide aggregation (e.g., sums, averages).</li>
+        </ul>
+        <li><b>table_qa_tapas_base_finetuned_sqa</b></li>
+        <ul>
+            <li>Best for: answering questions in a sequential question-answering context, where the current question depends on previous answers.</li>
+        </ul>
+    </ul>
+</div>
+""", unsafe_allow_html=True)
+# Conclusion
+st.markdown("""
+<div class="section">
+    <h4>Conclusion</h4>
+    <p>TAPAS, integrated with Spark NLP, provides a powerful solution for question-answering over tables, capable of handling both complex aggregation queries and straightforward Q&A tasks. Whether you're working with large datasets or simple tables, TAPAS offers flexibility and scalability. The <code>table_qa_tapas_base_finetuned_wtq</code> model excels in aggregation tasks, while <code>table_qa_tapas_base_finetuned_sqa</code> is best for direct, sequential question-answering.</p>
+    <p>By following this guide, you can efficiently implement TAPAS in your own projects, leveraging Spark NLP's powerful processing capabilities to extract insights from your data.</p>
+</div>
+""", unsafe_allow_html=True)
+# References
+st.markdown("""
+<div class="section">
+    <h4>References</h4>
+    <ul>
+        <li>Documentation : <a class="link" href="https://nlp.johnsnowlabs.com/docs/en/annotators#multidocumentassembler" target="_blank" rel="noopener">MultiDocumentAssembler</a>, <a class="link" href="https://nlp.johnsnowlabs.com/docs/en/annotators#TapasForQuestionAnswering">TapasForQuestionAnswering</a></li>
+        <li>Python Doc : <a class="link" href="https://nlp.johnsnowlabs.com/api/python/reference/autosummary/sparknlp/base/multi_document_assembler/index.html#sparknlp.base.multi_document_assembler.MultiDocumentAssembler.setIdCol" target="_blank" rel="noopener">MultiDocumentAssembler</a>, <a class="link" href="https://nlp.johnsnowlabs.com/api/python/reference/autosummary/sparknlp/annotator/classifier_dl/tapas_for_question_answering/index.html" target="_blank" rel="noopener">TapasForQuestionAnswering</a></li>
+        <li>Scala Doc : <a class="link" href="https://nlp.johnsnowlabs.com/api/com/johnsnowlabs/nlp/MultiDocumentAssembler.html" target="_blank" rel="noopener">MultiDocumentAssembler</a>, <a class="link" href="https://nlp.johnsnowlabs.com/api/com/johnsnowlabs/nlp/annotators/classifier/dl/TapasForQuestionAnswering.html">TapasForQuestionAnswering</a></li>
+        <li>Models Used : <a class="link" href="https://sparknlp.org/2022/09/30/table_qa_tapas_base_finetuned_wtq_en.html" target="_blank" rel="noopener">table_qa_tapas_base_finetuned_wtq</a>, <a class="link" href="https://sparknlp.org/2022/09/30/table_qa_tapas_base_finetuned_sqa_en.html">table_qa_tapas_base_finetuned_sqa</a></li>
+        <li>For extended examples of usage, see the notebooks for <a class="link" href="https://github.com/JohnSnowLabs/spark-nlp/blob/master/examples/python/annotation/text/english/document-assembler/Loading_Multiple_Documents.ipynb" target="_blank" rel="noopener">MultiDocumentAssembler</a>, <a class="link" href="https://github.com/JohnSnowLabs/spark-nlp-workshop/blob/master/tutorials/Certification_Trainings/Public/15.1_Table_Question_Answering.ipynb" target="_blank" rel="noopener">TapasForQuestionAnswering</a>.</li>
+        <li><a href="https://arxiv.org/abs/2004.02349" class="link" target="_blank">TAPAS: Weakly Supervised Table Parsing via Pre-trained Language Models</a></li>
+        <li><a href="https://nlp.johnsnowlabs.com/" class="link" target="_blank">Spark NLP Documentation</a></li>
+        <li><a href="https://nlp.johnsnowlabs.com/models" class="link" target="_blank">John Snow Labs Models Hub</a></li>
+    </ul>
+</div>
+""", unsafe_allow_html=True)
+# Community & Support
+st.markdown('<div class="sub-title">Community & Support</div>', unsafe_allow_html=True)
+st.markdown("""
+<div class="section">
+    <ul>
+        <li><a class="link" href="https://sparknlp.org/" target="_blank">Official Website</a>: Documentation and examples</li>
+        <li><a class="link" href="https://join.slack.com/t/spark-nlp/shared_invite/zt-198dipu77-L3UWNe_AJ8xqDk0ivmih5Q" target="_blank">Slack</a>: Live discussion with the community and team</li>
+        <li><a class="link" href="https://github.com/JohnSnowLabs/spark-nlp" target="_blank">GitHub</a>: Bug reports, feature requests, and contributions</li>
+        <li><a class="link" href="https://medium.com/spark-nlp" target="_blank">Medium</a>: Spark NLP articles</li>
+        <li><a class="link" href="https://www.youtube.com/channel/UCmFOjlpYEhxf_wJUDuz6xxQ/videos" target="_blank">YouTube</a>: Video tutorials</li>
+    </ul>
+</div>
+""", unsafe_allow_html=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+streamlit
+st-annotated-text
+pandas
+numpy
+spark-nlp
+pyspark