|
import streamlit as st |
|
from transformers import pipeline, AutoTokenizer, AutoModelForTokenClassification, AutoModelForSequenceClassification, AutoModelForCausalLM |
|
import pandas as pd |
|
|
|
|
|
st.set_page_config(layout="wide") |
|
|
|
|
|
example_list = [ |
|
"Mustafa Kemal Atatürk 1919 yılında Samsun'a çıktı.", |
|
"Bugün hava çok güzel ve enerjik hissediyorum.", |
|
"Yapay zeka ve makine öğrenimi hakkında birçok gelişme var." |
|
] |
|
|
|
|
|
st.title("NLP Toolkit") |
|
|
|
|
|
task_list = ['Metin Sınıflandırma', 'Metin Analizi', 'Duygu Analizi', 'Metin Oluşturma', 'Varlık Tanıma'] |
|
task = st.sidebar.selectbox("Görev Seç", task_list) |
|
|
|
|
|
st.subheader("Metin Giriş Yöntemi Seç") |
|
input_method = st.radio("", ('Örneklerden Seç', 'Metin Yaz veya Yapıştır')) |
|
|
|
|
|
if input_method == 'Örneklerden Seç': |
|
selected_text = st.selectbox('Metin Seç', example_list) |
|
input_text = st.text_area("Seçilen Metin", selected_text, height=128) |
|
elif input_method == "Metin Yaz veya Yapıştır": |
|
input_text = st.text_area('Metin Yaz veya Yapıştır', '', height=128) |
|
|
|
@st.cache_resource |
|
def load_pipeline(model_name, task_type): |
|
if task_type == "Metin Sınıflandırma": |
|
model = AutoModelForSequenceClassification.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment") |
|
tokenizer = AutoTokenizer.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment") |
|
return pipeline('text-classification', model=model, tokenizer=tokenizer) |
|
elif task_type == "Metin Analizi": |
|
model = AutoModelForTokenClassification.from_pretrained("dbmdz/bert-base-turkish-cased") |
|
tokenizer = AutoTokenizer.from_pretrained("dbmdz/bert-base-turkish-cased") |
|
return pipeline('ner', model=model, tokenizer=tokenizer) |
|
elif task_type == "Duygu Analizi": |
|
model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment") |
|
tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment") |
|
return pipeline('sentiment-analysis', model=model, tokenizer=tokenizer) |
|
elif task_type == "Metin Oluşturma": |
|
model = AutoModelForCausalLM.from_pretrained("gpt2") |
|
tokenizer = AutoTokenizer.from_pretrained("gpt2") |
|
return pipeline('text-generation', model=model, tokenizer=tokenizer) |
|
|
|
|
|
model_dict = { |
|
"Metin Sınıflandırma": "nlptown/bert-base-multilingual-uncased-sentiment", |
|
"Metin Analizi": "dbmdz/bert-base-turkish-cased", |
|
"Duygu Analizi": "cardiffnlp/twitter-roberta-base-sentiment", |
|
"Metin Oluşturma": "gpt2", |
|
"Varlık Tanıma": "dbmdz/bert-base-turkish-cased" |
|
} |
|
|
|
pipeline_model = load_pipeline(model_dict[task], task) |
|
|
|
def process_entities(entities, text): |
|
""" |
|
Varlıkları birleştirip anlamlı bir şekilde düzenler. |
|
""" |
|
processed_entities = [] |
|
current_entity = None |
|
|
|
for entity in entities: |
|
if entity['entity'].startswith('I-'): |
|
if current_entity and current_entity['label'] == entity['entity']: |
|
current_entity['word'] += entity['word'].replace('##', '') |
|
current_entity['end'] = entity['end'] |
|
current_entity['score'] = max(current_entity['score'], entity['score']) |
|
else: |
|
if current_entity: |
|
processed_entities.append(current_entity) |
|
current_entity = { |
|
'label': entity['entity'], |
|
'word': entity['word'].replace('##', ''), |
|
'start': entity['start'], |
|
'end': entity['end'], |
|
'score': entity['score'] |
|
} |
|
else: |
|
if current_entity: |
|
processed_entities.append(current_entity) |
|
current_entity = None |
|
if current_entity: |
|
processed_entities.append(current_entity) |
|
|
|
return processed_entities |
|
|
|
if st.button("Çalıştır") and input_text: |
|
if task in ["Metin Sınıflandırma", "Duygu Analizi"]: |
|
output = pipeline_model(input_text) |
|
df = pd.DataFrame(output) |
|
st.subheader(f"{task} Sonuçları") |
|
st.dataframe(df) |
|
elif task == "Metin Analizi": |
|
output = pipeline_model(input_text) |
|
|
|
|
|
st.write(output) |
|
|
|
|
|
if len(output) > 0 and 'entity' in output[0]: |
|
|
|
processed_entities = [] |
|
for entity in output: |
|
word = entity['word'] |
|
label = entity['entity'] |
|
score = entity['score'] |
|
start = entity['start'] |
|
end = entity['end'] |
|
processed_entities.append({ |
|
'word': word, |
|
'label': label, |
|
'score': score, |
|
'start': start, |
|
'end': end |
|
}) |
|
|
|
|
|
df = pd.DataFrame(process_entities(processed_entities, input_text)) |
|
st.subheader("Tanımlanan Varlıklar") |
|
st.dataframe(df) |
|
|
|
|
|
def format_text(text_data, original_text): |
|
formatted_text = "" |
|
last_end = 0 |
|
for item in text_data: |
|
if item['start'] > last_end: |
|
formatted_text += original_text[last_end:item['start']] |
|
word = item['word'] |
|
label = item['label'] |
|
score = item['score'] |
|
if label.startswith('I-PER'): |
|
color = 'blue' |
|
elif label.startswith('I-MISC'): |
|
color = 'green' |
|
else: |
|
color = 'gray' |
|
formatted_text += f"<span style='color:{color}; font-weight: bold;'>{word} ({label}, {score:.2f})</span>" |
|
last_end = item['end'] |
|
if last_end < len(original_text): |
|
formatted_text += original_text[last_end:] |
|
return formatted_text |
|
|
|
formatted_text = format_text(process_entities(processed_entities, input_text), input_text) |
|
st.subheader("Analiz Edilen Metin") |
|
st.markdown(f"<p>{formatted_text}</p>", unsafe_allow_html=True) |
|
else: |
|
st.error("Varlık analizi sonucu beklenen formatta değil.") |
|
elif task == "Metin Oluşturma": |
|
output = pipeline_model(input_text, max_length=100, num_return_sequences=1) |
|
st.subheader("Oluşturulan Metin") |
|
st.write(output[0]['generated_text']) |
|
|