Spaces:

WhiteAngelss
/

turkce-varlik-tanima-teknofest-nlp

Running

App Files Files Community

turkce-varlik-tanima-teknofest-nlp / app.py

ihsan66

Update app.py

b236025 verified 4 months ago

raw

history blame

6.86 kB

	import streamlit as st
	from transformers import pipeline, AutoTokenizer, AutoModelForTokenClassification, AutoModelForSequenceClassification, AutoModelForCausalLM
	import pandas as pd

	# Uygulama sayfa ayarları
	st.set_page_config(layout="wide")

	# Örnek metin listesi
	example_list = [
	"Mustafa Kemal Atatürk 1919 yılında Samsun'a çıktı.",
	"Bugün hava çok güzel ve enerjik hissediyorum.",
	"Yapay zeka ve makine öğrenimi hakkında birçok gelişme var."
	]

	# Uygulama başlığı
	st.title("NLP Toolkit")

	# Görev seçimi
	task_list = ['Metin Sınıflandırma', 'Metin Analizi', 'Duygu Analizi', 'Metin Oluşturma', 'Varlık Tanıma']
	task = st.sidebar.selectbox("Görev Seç", task_list)

	# Metin giriş yöntemi
	st.subheader("Metin Giriş Yöntemi Seç")
	input_method = st.radio("", ('Örneklerden Seç', 'Metin Yaz veya Yapıştır'))

	# Metin girişine göre seçim
	if input_method == 'Örneklerden Seç':
	selected_text = st.selectbox('Metin Seç', example_list)
	input_text = st.text_area("Seçilen Metin", selected_text, height=128)
	elif input_method == "Metin Yaz veya Yapıştır":
	input_text = st.text_area('Metin Yaz veya Yapıştır', '', height=128)

	@st.cache_resource
	def load_pipeline(model_name, task_type):
	if task_type == "Metin Sınıflandırma":
	model = AutoModelForSequenceClassification.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")
	tokenizer = AutoTokenizer.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")
	return pipeline('text-classification', model=model, tokenizer=tokenizer)
	elif task_type == "Metin Analizi":
	model = AutoModelForTokenClassification.from_pretrained("dbmdz/bert-base-turkish-cased")
	tokenizer = AutoTokenizer.from_pretrained("dbmdz/bert-base-turkish-cased")
	return pipeline('ner', model=model, tokenizer=tokenizer)
	elif task_type == "Duygu Analizi":
	model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment")
	tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment")
	return pipeline('sentiment-analysis', model=model, tokenizer=tokenizer)
	elif task_type == "Metin Oluşturma":
	model = AutoModelForCausalLM.from_pretrained("gpt2")
	tokenizer = AutoTokenizer.from_pretrained("gpt2")
	return pipeline('text-generation', model=model, tokenizer=tokenizer)

	# Görev ve modele göre pipeline yükleme
	model_dict = {
	"Metin Sınıflandırma": "nlptown/bert-base-multilingual-uncased-sentiment",
	"Metin Analizi": "dbmdz/bert-base-turkish-cased",
	"Duygu Analizi": "cardiffnlp/twitter-roberta-base-sentiment",
	"Metin Oluşturma": "gpt2",
	"Varlık Tanıma": "dbmdz/bert-base-turkish-cased"
	}

	pipeline_model = load_pipeline(model_dict[task], task)

	def process_entities(entities, text):
	"""
	Varlıkları birleştirip anlamlı bir şekilde düzenler.
	"""
	processed_entities = []
	current_entity = None

	for entity in entities:
	if entity['entity'].startswith('I-'):
	if current_entity and current_entity['label'] == entity['entity']:
	current_entity['word'] += entity['word'].replace('##', '')
	current_entity['end'] = entity['end']
	current_entity['score'] = max(current_entity['score'], entity['score'])
	else:
	if current_entity:
	processed_entities.append(current_entity)
	current_entity = {
	'label': entity['entity'],
	'word': entity['word'].replace('##', ''),
	'start': entity['start'],
	'end': entity['end'],
	'score': entity['score']
	}
	else:
	if current_entity:
	processed_entities.append(current_entity)
	current_entity = None
	if current_entity:
	processed_entities.append(current_entity)

	return processed_entities

	if st.button("Çalıştır") and input_text:
	if task in ["Metin Sınıflandırma", "Duygu Analizi"]:
	output = pipeline_model(input_text)
	df = pd.DataFrame(output)
	st.subheader(f"{task} Sonuçları")
	st.dataframe(df)
	elif task == "Metin Analizi":
	output = pipeline_model(input_text)

	# Çıktıyı gözlemleme
	st.write(output)

	# Varlıkları işlemek için uygun formatı kontrol edin
	if len(output) > 0 and 'entity' in output[0]:
	# Process entities
	processed_entities = []
	for entity in output:
	word = entity['word']
	label = entity['entity']
	score = entity['score']
	start = entity['start']
	end = entity['end']
	processed_entities.append({
	'word': word,
	'label': label,
	'score': score,
	'start': start,
	'end': end
	})

	# Aggregate entities
	df = pd.DataFrame(process_entities(processed_entities, input_text))
	st.subheader("Tanımlanan Varlıklar")
	st.dataframe(df)

	# Metni formatla
	def format_text(text_data, original_text):
	formatted_text = ""
	last_end = 0
	for item in text_data:
	if item['start'] > last_end:
	formatted_text += original_text[last_end:item['start']]
	word = item['word']
	label = item['label']
	score = item['score']
	if label.startswith('I-PER'):
	color = 'blue'
	elif label.startswith('I-MISC'):
	color = 'green'
	else:
	color = 'gray'
	formatted_text += f"<span style='color:{color}; font-weight: bold;'>{word} ({label}, {score:.2f})</span>"
	last_end = item['end']
	if last_end < len(original_text):
	formatted_text += original_text[last_end:]
	return formatted_text

	formatted_text = format_text(process_entities(processed_entities, input_text), input_text)
	st.subheader("Analiz Edilen Metin")
	st.markdown(f"<p>{formatted_text}</p>", unsafe_allow_html=True)
	else:
	st.error("Varlık analizi sonucu beklenen formatta değil.")
	elif task == "Metin Oluşturma":
	output = pipeline_model(input_text, max_length=100, num_return_sequences=1)
	st.subheader("Oluşturulan Metin")
	st.write(output[0]['generated_text'])