Spaces:
Runtime error
Runtime error
File size: 5,328 Bytes
34475ca 1f8139d 3c8522e 1f8139d 3e1d85e 1f8139d 96d6ff2 3e1d85e 96d6ff2 3e1d85e 3c8522e 3e1d85e 1a6e1ca 546ed8e cd8fa87 96d6ff2 3e1d85e cd8fa87 3e1d85e 85c7334 3e1d85e 85c7334 3e1d85e 96d6ff2 3e1d85e b284e87 cd8fa87 3e1d85e cd8fa87 6c4f7e8 85c7334 1a6e1ca 0e88fea 6f918d6 0e88fea 3e1d85e 318e9b0 b284e87 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 |
import pkg_resources
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
import gradio as gr
from ferret import Benchmark
# Load models and tokenizers
sentiment_tokenizer = AutoTokenizer.from_pretrained("Birkir/electra-base-igc-is-sentiment-analysis")
sentiment_model = AutoModelForSequenceClassification.from_pretrained("Birkir/electra-base-igc-is-sentiment-analysis")
formality_tokenizer = AutoTokenizer.from_pretrained("svanhvit/formality-classification-icebert")
formality_model = AutoModelForSequenceClassification.from_pretrained("svanhvit/formality-classification-icebert")
toxicity_tokenizer = AutoTokenizer.from_pretrained("unitary/toxic-bert")
toxicity_model = AutoModelForSequenceClassification.from_pretrained("unitary/toxic-bert")
politeness_tokenizer = AutoTokenizer.from_pretrained("Genius1237/xlm-roberta-large-tydip")
politeness_model = AutoModelForSequenceClassification.from_pretrained("Genius1237/xlm-roberta-large-tydip")
# Initialize benchmarks
sentiment_bench = Benchmark(sentiment_model, sentiment_tokenizer)
formality_bench = Benchmark(formality_model, formality_tokenizer)
toxicity_bench = Benchmark(toxicity_model, toxicity_tokenizer)
politeness_bench = Benchmark(politeness_model, politeness_tokenizer)
# Initialize pipelines for translation and text classification
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-is-en")
sentiment_classifier = pipeline("text-classification", model="Birkir/electra-base-igc-is-sentiment-analysis")
formality_classifier = pipeline("text-classification", model="svanhvit/formality-classification-icebert")
detoxify_classifier = pipeline('text-classification', model='unitary/toxic-bert', tokenizer='bert-base-uncased', function_to_apply='sigmoid', top_k=None)
politeness_classifier = pipeline("text-classification", model="Genius1237/xlm-roberta-large-tydip")
def replace_encoding(tokens):
return [token.replace('Ġ', ' ')
.replace('ð', 'ð')
.replace('é', 'é')
.replace('æ', 'æ')
.replace('ý', 'ý')
.replace('á', 'á')
.replace('ú', 'ú')
.replace('ÃŃ', 'í')
.replace('Ãö', 'ö')
.replace('þ', 'þ')
.replace('Ãģ', 'Á')
.replace('Ãį', 'Ú')
.replace('Ãĵ', 'Ó')
.replace('ÃĨ', 'Æ')
.replace('ÃIJ', 'Ð')
.replace('Ãĸ', 'Ö')
.replace('Ãī', 'É')
.replace('Ãļ', 'ý')
for token in tokens[1:-1]]
def analyze_with_influence(text, bench):
explanations = bench.explain(text, target=0)
influential_words = []
for explanation in explanations:
if explanation.explainer == 'Partition SHAP':
tokens = replace_encoding(explanation.tokens)
token_score_pairs = zip(tokens, explanation.scores)
influential_words.extend([(token, score) for token, score in token_score_pairs])
influential_words_str = "; ".join([f"{token} ({score:.2f})" for token, score in influential_words])
return influential_words_str
def analyze_text(icelandic_text):
# Perform translations
translated_text = translator(icelandic_text, max_length=512)[0]['translation_text']
# Perform initial analysis to get scores
sentiment_result = sentiment_classifier(icelandic_text)[0]
formality_result = formality_classifier(icelandic_text)[0]
# Assuming detoxify_classifier gives a list of dictionaries, we need to adjust how we process this
# For the sake of example, let's just mock a toxicity score here. Adjust this based on actual model output
toxicity_mock_score = 0.5 # Placeholder, replace with actual processing of detoxify_classifier output
politeness_result = politeness_classifier(translated_text)[0]
# Gather scores and labels
scores_labels = {
"Sentiment": (sentiment_result['score'], sentiment_bench),
"Formality": (formality_result['score'], formality_bench),
"Toxicity": (toxicity_mock_score, toxicity_bench), # Use the mock or processed score
"Politeness": (politeness_result['score'], politeness_bench)
}
# Identify the aspect with the lowest score
lowest_aspect = min(scores_labels, key=lambda x: scores_labels[x][0])
# Perform Ferret analysis on the aspect with the lowest score
influential_words = analyze_with_influence(icelandic_text if lowest_aspect in ["Sentiment", "Formality"] else translated_text, scores_labels[lowest_aspect][1])
analysis_results = f"""
Translated Text: {translated_text}\n\n
Lowest Score Aspect: {lowest_aspect}\n
Influential Words in {lowest_aspect}: {influential_words}
"""
return analysis_results.strip()
demo = gr.Interface(fn=analyze_text,
inputs=gr.Textbox(lines=2, placeholder="Enter Icelandic Text Here..."),
outputs=gr.Textbox(label="Analysis Results"),
title="Icelandic Text Analysis",
description="This app translates Icelandic text to English and performs analysis with influential words for the aspect with the lowest score.")
if __name__ == "__main__":
demo.launch()
|