abdullahmubeen10 commited on
Commit
4f033e8
·
verified ·
1 Parent(s): 7187099

Update Demo.py

Browse files
Files changed (1) hide show
  1. Demo.py +28 -22
Demo.py CHANGED
@@ -2,15 +2,12 @@ import streamlit as st
2
  import sparknlp
3
  import pandas as pd
4
  import json
5
- import os
6
 
7
  from sparknlp.base import *
8
  from sparknlp.annotator import *
9
  from pyspark.ml import Pipeline
10
  from sparknlp.pretrained import PretrainedPipeline
11
 
12
- os.environ['PYSPARK_DRIVER_PYTHON'] = '/usr/bin/python3.8'
13
-
14
  # Page configuration
15
  st.set_page_config(
16
  layout="wide",
@@ -40,13 +37,7 @@ st.markdown("""
40
 
41
  @st.cache_resource
42
  def init_spark():
43
- from pyspark.sql import SparkSession
44
- spark = SparkSession.builder \
45
- .appName("App") \
46
- .config("spark.pyspark.python", "/usr/bin/python3.8") \
47
- .config("spark.pyspark.driver.python", "/usr/bin/python3.8") \
48
- .getOrCreate()
49
- return spark
50
 
51
  @st.cache_resource
52
  def create_pipeline(model):
@@ -75,11 +66,20 @@ def create_pipeline(model):
75
  pipeline = Pipeline(stages=[document_assembler, sentence_detector, table_assembler, tapas_wtq, tapas_sqa])
76
  return pipeline
77
 
 
 
 
 
 
 
 
78
  def fit_data(pipeline, json_data, question):
79
  spark_df = spark.createDataFrame([[json_data, question]]).toDF("table_json", "questions")
80
  model = pipeline.fit(spark_df)
81
- result = model.transform(spark_df)
82
- return result.select("answers_wtq.result", "answers_sqa.result").collect()
 
 
83
 
84
  # Sidebar content
85
  model = st.sidebar.selectbox(
@@ -90,19 +90,19 @@ model = st.sidebar.selectbox(
90
 
91
  # Set up the page layout
92
  title = 'TAPAS for Table-Based Question Answering with Spark NLP'
93
- sub_title = (
94
- 'TAPAS (Table Parsing Supervised via Pre-trained Language Models) is a model that extends '
95
- 'the BERT architecture to handle tabular data. Unlike traditional models that require flattening '
96
- 'tables into text, TAPAS can directly interpret tables, making it a powerful tool for answering '
97
- 'questions that involve tabular data.'
98
- )
99
 
100
  st.markdown(f'<div class="main-title">{title}</div>', unsafe_allow_html=True)
101
  st.markdown(f'<div class="section"><p>{sub_title}</p></div>', unsafe_allow_html=True)
102
 
103
  # Reference notebook link in sidebar
104
  link = """
105
- <a href="https://github.com/JohnSnowLabs/spark-nlp-workshop/blob/master/tutorials/Certification_Trainings/Public/15.1_Table_Question_Answering.ipynb">
106
  <img src="https://colab.research.google.com/assets/colab-badge.svg" style="zoom: 1.3" alt="Open In Colab"/>
107
  </a>
108
  """
@@ -149,7 +149,6 @@ queries = [
149
  "How many billionaires are in the conglomerate industry?"
150
  ]
151
 
152
-
153
  # Load the JSON data into a DataFrame and display it
154
  table_data = json.loads(json_data)
155
  df_table = pd.DataFrame(table_data["rows"], columns=table_data["header"])
@@ -180,5 +179,12 @@ output = fit_data(pipeline, table_json_str, text_to_analyze)
180
 
181
  # Display the output
182
  st.markdown("---")
183
- st.subheader("Processed output:")
184
- st.write("**Answer:**", ', '.join(output[0][0]))
 
 
 
 
 
 
 
 
2
  import sparknlp
3
  import pandas as pd
4
  import json
 
5
 
6
  from sparknlp.base import *
7
  from sparknlp.annotator import *
8
  from pyspark.ml import Pipeline
9
  from sparknlp.pretrained import PretrainedPipeline
10
 
 
 
11
  # Page configuration
12
  st.set_page_config(
13
  layout="wide",
 
37
 
38
  @st.cache_resource
39
  def init_spark():
40
+ return sparknlp.start()
 
 
 
 
 
 
41
 
42
  @st.cache_resource
43
  def create_pipeline(model):
 
66
  pipeline = Pipeline(stages=[document_assembler, sentence_detector, table_assembler, tapas_wtq, tapas_sqa])
67
  return pipeline
68
 
69
+ def fit_data(pipeline, data):
70
+ empty_df = spark.createDataFrame([['']]).toDF('text')
71
+ pipeline_model = pipeline.fit(empty_df)
72
+ model = LightPipeline(pipeline_model)
73
+ result = model.fullAnnotate(data)
74
+ return result
75
+
76
  def fit_data(pipeline, json_data, question):
77
  spark_df = spark.createDataFrame([[json_data, question]]).toDF("table_json", "questions")
78
  model = pipeline.fit(spark_df)
79
+ lightPipelineModel = LightPipeline(model)
80
+ result = lightPipelineModel.fullAnnotate(data)
81
+ st.write(result)
82
+ return result
83
 
84
  # Sidebar content
85
  model = st.sidebar.selectbox(
 
90
 
91
  # Set up the page layout
92
  title = 'TAPAS for Table-Based Question Answering with Spark NLP'
93
+ sub_title = ("""
94
+ TAPAS (Table Parsing Supervised via Pre-trained Language Models) enhances the BERT architecture to effectively process tabular data, allowing it to answer complex questions about tables without needing to convert them into text.<br>
95
+ <br>
96
+ <strong>table_qa_tapas_base_finetuned_wtq:</strong> This model excels at answering questions that require aggregating data across the entire table, such as calculating sums or averages.<br>
97
+ <strong>table_qa_tapas_base_finetuned_sqa:</strong> This model is designed for sequential question-answering tasks where the answer to each question may depend on the context provided by previous answers.
98
+ """)
99
 
100
  st.markdown(f'<div class="main-title">{title}</div>', unsafe_allow_html=True)
101
  st.markdown(f'<div class="section"><p>{sub_title}</p></div>', unsafe_allow_html=True)
102
 
103
  # Reference notebook link in sidebar
104
  link = """
105
+ <a href="https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/tutorials/streamlit_notebooks/NER_HINDI_ENGLISH.ipynb">
106
  <img src="https://colab.research.google.com/assets/colab-badge.svg" style="zoom: 1.3" alt="Open In Colab"/>
107
  </a>
108
  """
 
149
  "How many billionaires are in the conglomerate industry?"
150
  ]
151
 
 
152
  # Load the JSON data into a DataFrame and display it
153
  table_data = json.loads(json_data)
154
  df_table = pd.DataFrame(table_data["rows"], columns=table_data["header"])
 
179
 
180
  # Display the output
181
  st.markdown("---")
182
+ st.subheader("Processed Output")
183
+
184
+ # # Check if output is available
185
+ # if output:
186
+ # results_wtq = output[0][0] if output[0][0] else "No results found."
187
+ # results_sqa = output[0][1] if output[0][1] else "No results found."
188
+ # st.markdown(f"**Answers from WTQ model:** {', '.join(results_wtq)}")
189
+ # st.markdown(f"**Answers from SQA model:** {', '.join(results_sqa)}")
190
+