File size: 1,265 Bytes
c15480b
8ff3ba1
94327b4
 
7abeff1
94327b4
 
 
 
4579865
7abeff1
 
8ff3ba1
 
 
7abeff1
 
8ff3ba1
c15480b
94327b4
7abeff1
c15480b
8ff3ba1
c15480b
 
94327b4
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import streamlit as st
from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
from sparknlp_display import Display
from pyspark.sql import SparkSession

# สร้าง Spark Session
spark = SparkSession.builder \
    .appName("NER Analysis") \
    .getOrCreate()

# โหลดโมเดล NER จาก Hugging Face
model_name = "Nucha/Nucha_SkillNER_BERT"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForTokenClassification.from_pretrained(model_name)

# สร้าง pipeline สำหรับ NER
ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple")

# UI ด้วย Streamlit
st.title("NER Analysis with Nucha SkillNER BERT and Spark NLP Display")

text = st.text_area("Enter text for NER analysis:")

if st.button("Analyze"):
    ner_results = ner_pipeline(text)

    # สร้าง DataFrame สำหรับผลลัพธ์
    data = [{"word": entity['word'], "start": entity['start'], "end": entity['end'], "label": entity['entity']} for entity in ner_results]
    ner_df = spark.createDataFrame(data)

    # แสดงผลด้วย sparknlp_display
    display = Display()
    st.write(display.display(ner_df, "word", "label"))