Spaces:

karalif
/

PipelineSpace

Runtime error

App Files Files Community

PipelineSpace / app.py

karalif

Update app.py

85c7334 verified about 1 year ago

raw

history blame contribute delete

5.33 kB

	import pkg_resources
	from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
	import gradio as gr
	from ferret import Benchmark

	# Load models and tokenizers
	sentiment_tokenizer = AutoTokenizer.from_pretrained("Birkir/electra-base-igc-is-sentiment-analysis")
	sentiment_model = AutoModelForSequenceClassification.from_pretrained("Birkir/electra-base-igc-is-sentiment-analysis")
	formality_tokenizer = AutoTokenizer.from_pretrained("svanhvit/formality-classification-icebert")
	formality_model = AutoModelForSequenceClassification.from_pretrained("svanhvit/formality-classification-icebert")
	toxicity_tokenizer = AutoTokenizer.from_pretrained("unitary/toxic-bert")
	toxicity_model = AutoModelForSequenceClassification.from_pretrained("unitary/toxic-bert")
	politeness_tokenizer = AutoTokenizer.from_pretrained("Genius1237/xlm-roberta-large-tydip")
	politeness_model = AutoModelForSequenceClassification.from_pretrained("Genius1237/xlm-roberta-large-tydip")

	# Initialize benchmarks
	sentiment_bench = Benchmark(sentiment_model, sentiment_tokenizer)
	formality_bench = Benchmark(formality_model, formality_tokenizer)
	toxicity_bench = Benchmark(toxicity_model, toxicity_tokenizer)
	politeness_bench = Benchmark(politeness_model, politeness_tokenizer)

	# Initialize pipelines for translation and text classification
	translator = pipeline("translation", model="Helsinki-NLP/opus-mt-is-en")
	sentiment_classifier = pipeline("text-classification", model="Birkir/electra-base-igc-is-sentiment-analysis")
	formality_classifier = pipeline("text-classification", model="svanhvit/formality-classification-icebert")
	detoxify_classifier = pipeline('text-classification', model='unitary/toxic-bert', tokenizer='bert-base-uncased', function_to_apply='sigmoid', top_k=None)
	politeness_classifier = pipeline("text-classification", model="Genius1237/xlm-roberta-large-tydip")

	def replace_encoding(tokens):
	return [token.replace('Ġ', ' ')
	.replace('Ã°', 'ð')
	.replace('Ã©', 'é')
	.replace('Ã¦', 'æ')
	.replace('Ã½', 'ý')
	.replace('Ã¡', 'á')
	.replace('Ãº', 'ú')
	.replace('ÃŃ', 'í')
	.replace('Ãö', 'ö')
	.replace('Ã¾', 'þ')
	.replace('Ãģ', 'Á')
	.replace('Ãį', 'Ú')
	.replace('Ãĵ', 'Ó')
	.replace('ÃĨ', 'Æ')
	.replace('ÃĲ', 'Ð')
	.replace('Ãĸ', 'Ö')
	.replace('Ãī', 'É')
	.replace('Ãļ', 'ý')
	for token in tokens[1:-1]]

	def analyze_with_influence(text, bench):
	explanations = bench.explain(text, target=0)
	influential_words = []
	for explanation in explanations:
	if explanation.explainer == 'Partition SHAP':
	tokens = replace_encoding(explanation.tokens)
	token_score_pairs = zip(tokens, explanation.scores)
	influential_words.extend([(token, score) for token, score in token_score_pairs])
	influential_words_str = "; ".join([f"{token} ({score:.2f})" for token, score in influential_words])
	return influential_words_str

	def analyze_text(icelandic_text):
	# Perform translations
	translated_text = translator(icelandic_text, max_length=512)[0]['translation_text']

	# Perform initial analysis to get scores
	sentiment_result = sentiment_classifier(icelandic_text)[0]
	formality_result = formality_classifier(icelandic_text)[0]
	# Assuming detoxify_classifier gives a list of dictionaries, we need to adjust how we process this
	# For the sake of example, let's just mock a toxicity score here. Adjust this based on actual model output
	toxicity_mock_score = 0.5 # Placeholder, replace with actual processing of detoxify_classifier output
	politeness_result = politeness_classifier(translated_text)[0]

	# Gather scores and labels
	scores_labels = {
	"Sentiment": (sentiment_result['score'], sentiment_bench),
	"Formality": (formality_result['score'], formality_bench),
	"Toxicity": (toxicity_mock_score, toxicity_bench), # Use the mock or processed score
	"Politeness": (politeness_result['score'], politeness_bench)
	}

	# Identify the aspect with the lowest score
	lowest_aspect = min(scores_labels, key=lambda x: scores_labels[x][0])

	# Perform Ferret analysis on the aspect with the lowest score
	influential_words = analyze_with_influence(icelandic_text if lowest_aspect in ["Sentiment", "Formality"] else translated_text, scores_labels[lowest_aspect][1])

	analysis_results = f"""
	Translated Text: {translated_text}\n\n
	Lowest Score Aspect: {lowest_aspect}\n
	Influential Words in {lowest_aspect}: {influential_words}
	"""
	return analysis_results.strip()


	demo = gr.Interface(fn=analyze_text,
	inputs=gr.Textbox(lines=2, placeholder="Enter Icelandic Text Here..."),
	outputs=gr.Textbox(label="Analysis Results"),
	title="Icelandic Text Analysis",
	description="This app translates Icelandic text to English and performs analysis with influential words for the aspect with the lowest score.")

	if __name__ == "__main__":
	demo.launch()