Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -3,64 +3,59 @@ from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassifica
|
|
3 |
import gradio as gr
|
4 |
from ferret import Benchmark
|
5 |
|
|
|
6 |
sentiment_tokenizer = AutoTokenizer.from_pretrained("Birkir/electra-base-igc-is-sentiment-analysis")
|
7 |
sentiment_model = AutoModelForSequenceClassification.from_pretrained("Birkir/electra-base-igc-is-sentiment-analysis")
|
8 |
-
|
9 |
sentiment_bench = Benchmark(sentiment_model, sentiment_tokenizer)
|
10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-is-en")
|
12 |
sentiment_classifier = pipeline("text-classification", model="Birkir/electra-base-igc-is-sentiment-analysis")
|
13 |
formality_classifier = pipeline("text-classification", model="svanhvit/formality-classification-icebert")
|
14 |
detoxify_pipeline = pipeline('text-classification', model='unitary/toxic-bert', tokenizer='bert-base-uncased', function_to_apply='sigmoid', top_k=None)
|
15 |
politeness_classifier = pipeline("text-classification", model="Genius1237/xlm-roberta-large-tydip")
|
16 |
|
17 |
-
def
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
explanations_sentiment = sentiment_bench.explain(icelandic_text, target=1) # Adjust target as necessary
|
23 |
-
|
24 |
influential_words = []
|
25 |
-
for explanation in
|
26 |
if explanation.explainer == 'Partition SHAP':
|
27 |
tokens = replace_encoding(explanation.tokens)
|
28 |
token_score_pairs = zip(tokens, explanation.scores)
|
29 |
influential_words.extend([(token, score) for token, score in token_score_pairs])
|
30 |
-
|
31 |
influential_words_str = "; ".join([f"{token} ({score:.2f})" for token, score in influential_words])
|
32 |
-
|
33 |
-
analysis_results = (
|
34 |
-
f"Sentiment: Label: {sentiment_label}, Score: {round(sentiment_score, 2)}\n"
|
35 |
-
f"Influential Words: {influential_words_str}"
|
36 |
-
)
|
37 |
-
return analysis_results
|
38 |
-
|
39 |
-
def replace_encoding(tokens):
|
40 |
-
return [token.replace('臓', ' ')
|
41 |
-
.replace('脙掳', '冒')
|
42 |
-
.replace('脙漏', '茅')
|
43 |
-
.replace('脙娄', '忙')
|
44 |
-
.replace('脙陆', '媒')
|
45 |
-
.replace('脙隆', '谩')
|
46 |
-
.replace('脙潞', '煤')
|
47 |
-
.replace('脙艃', '铆')
|
48 |
-
.replace('脙枚', '枚')
|
49 |
-
.replace('脙戮', '镁')
|
50 |
-
.replace('脙模', '脕')
|
51 |
-
.replace('脙寞', '脷')
|
52 |
-
.replace('脙牡', '脫')
|
53 |
-
.replace('脙抹', '脝')
|
54 |
-
.replace('脙牟', '脨')
|
55 |
-
.replace('脙母', '脰')
|
56 |
-
.replace('脙墨', '脡')
|
57 |
-
.replace('脙募', '媒')
|
58 |
-
for token in tokens[1:-1]]
|
59 |
|
60 |
def translate_text(text):
|
61 |
translation = translator(text, max_length=512)
|
62 |
return translation[0]['translation_text']
|
63 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
def analyze_toxicity(text):
|
65 |
toxicity_results = detoxify_pipeline(text)
|
66 |
return toxicity_results[0]
|
@@ -69,45 +64,40 @@ def analyze_politeness(text):
|
|
69 |
politeness_result = politeness_classifier(text)
|
70 |
return politeness_result[0]['label'], politeness_result[0]['score']
|
71 |
|
72 |
-
def
|
73 |
-
|
74 |
-
formality_label = formality_result[0]['label']
|
75 |
-
formality_score = formality_result[0]['score']
|
76 |
-
return formality_label, formality_score
|
77 |
|
78 |
-
|
79 |
-
|
80 |
-
sentiment_label = sentiment_result[0]['label']
|
81 |
-
sentiment_score = sentiment_result[0]['score']
|
82 |
-
return sentiment_label, sentiment_score
|
83 |
|
84 |
-
def analyze_text(icelandic_text):
|
85 |
formality_label, formality_score = analyze_formality(icelandic_text)
|
86 |
-
|
|
|
87 |
toxicity_results = analyze_toxicity(translated_text)
|
88 |
-
if isinstance(toxicity_results, list):
|
89 |
-
toxicity_results = toxicity_results[0]
|
90 |
toxicity_label = '1' if toxicity_results['score'] >= 0.5 else '0'
|
91 |
-
|
92 |
-
politeness_label = '1' if politeness_label.lower() == 'polite' else '0'
|
93 |
-
|
94 |
-
sentiment_analysis_with_influence = analyze_sentiment_with_influence(icelandic_text)
|
95 |
-
|
96 |
-
analysis_results = (
|
97 |
-
f"Translated Text: {translated_text}\n\n"
|
98 |
-
f"{sentiment_analysis_with_influence}\n"
|
99 |
-
f"Formality: Label: {formality_label}, Score: {round(formality_score, 2)}\n"
|
100 |
-
f"Toxicity: Label: {toxicity_label}, Score: {round(toxicity_results['score'], 2)}\n"
|
101 |
-
f"Politeness: Label: {politeness_label}, Score: {round(politeness_score, 2)}"
|
102 |
-
)
|
103 |
-
return analysis_results
|
104 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
105 |
|
106 |
demo = gr.Interface(fn=analyze_text,
|
107 |
inputs=gr.Textbox(lines=2, placeholder="Enter Icelandic Text Here..."),
|
108 |
outputs=gr.Textbox(label="Analysis Results"),
|
109 |
title="Icelandic Text Analysis",
|
110 |
-
description="This app translates Icelandic text to English and performs sentiment, formality, toxicity, and politeness analysis.")
|
111 |
|
112 |
if __name__ == "__main__":
|
113 |
demo.launch()
|
|
|
3 |
import gradio as gr
|
4 |
from ferret import Benchmark
|
5 |
|
6 |
+
# Sentiment
|
7 |
sentiment_tokenizer = AutoTokenizer.from_pretrained("Birkir/electra-base-igc-is-sentiment-analysis")
|
8 |
sentiment_model = AutoModelForSequenceClassification.from_pretrained("Birkir/electra-base-igc-is-sentiment-analysis")
|
|
|
9 |
sentiment_bench = Benchmark(sentiment_model, sentiment_tokenizer)
|
10 |
|
11 |
+
# Formality
|
12 |
+
formality_tokenizer = AutoTokenizer.from_pretrained("svanhvit/formality-classification-icebert")
|
13 |
+
formality_model = AutoModelForSequenceClassification.from_pretrained("svanhvit/formality-classification-icebert")
|
14 |
+
formality_bench = Benchmark(formality_model, formality_tokenizer)
|
15 |
+
|
16 |
+
# Toxicity
|
17 |
+
toxicity_tokenizer = AutoTokenizer.from_pretrained("unitary/toxic-bert")
|
18 |
+
toxicity_model = AutoModelForSequenceClassification.from_pretrained("unitary/toxic-bert")
|
19 |
+
toxicity_bench = Benchmark(toxicity_model, toxicity_tokenizer)
|
20 |
+
|
21 |
+
# Politeness
|
22 |
+
politeness_tokenizer = AutoTokenizer.from_pretrained("Genius1237/xlm-roberta-large-tydip")
|
23 |
+
politeness_model = AutoModelForSequenceClassification.from_pretrained("Genius1237/xlm-roberta-large-tydip")
|
24 |
+
politeness_bench = Benchmark(politeness_model, politeness_tokenizer)
|
25 |
+
|
26 |
+
# Pipelines
|
27 |
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-is-en")
|
28 |
sentiment_classifier = pipeline("text-classification", model="Birkir/electra-base-igc-is-sentiment-analysis")
|
29 |
formality_classifier = pipeline("text-classification", model="svanhvit/formality-classification-icebert")
|
30 |
detoxify_pipeline = pipeline('text-classification', model='unitary/toxic-bert', tokenizer='bert-base-uncased', function_to_apply='sigmoid', top_k=None)
|
31 |
politeness_classifier = pipeline("text-classification", model="Genius1237/xlm-roberta-large-tydip")
|
32 |
|
33 |
+
def replace_encoding(tokens):
|
34 |
+
return [token.replace('臓', ' ') for token in tokens]
|
35 |
+
|
36 |
+
def analyze_text_with_influence(text, bench, label_conversion):
|
37 |
+
explanations = bench.explain(text, target=1)
|
|
|
|
|
38 |
influential_words = []
|
39 |
+
for explanation in explanations:
|
40 |
if explanation.explainer == 'Partition SHAP':
|
41 |
tokens = replace_encoding(explanation.tokens)
|
42 |
token_score_pairs = zip(tokens, explanation.scores)
|
43 |
influential_words.extend([(token, score) for token, score in token_score_pairs])
|
|
|
44 |
influential_words_str = "; ".join([f"{token} ({score:.2f})" for token, score in influential_words])
|
45 |
+
return label_conversion, influential_words_str
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
|
47 |
def translate_text(text):
|
48 |
translation = translator(text, max_length=512)
|
49 |
return translation[0]['translation_text']
|
50 |
|
51 |
+
def analyze_sentiment(text):
|
52 |
+
sentiment_result = sentiment_classifier(text)
|
53 |
+
return sentiment_result[0]['label'].replace("LABEL_", ""), sentiment_result[0]['score']
|
54 |
+
|
55 |
+
def analyze_formality(text):
|
56 |
+
formality_result = formality_classifier(text)
|
57 |
+
return formality_result[0]['label'], formality_result[0]['score']
|
58 |
+
|
59 |
def analyze_toxicity(text):
|
60 |
toxicity_results = detoxify_pipeline(text)
|
61 |
return toxicity_results[0]
|
|
|
64 |
politeness_result = politeness_classifier(text)
|
65 |
return politeness_result[0]['label'], politeness_result[0]['score']
|
66 |
|
67 |
+
def analyze_text(icelandic_text):
|
68 |
+
translated_text = translate_text(icelandic_text)
|
|
|
|
|
|
|
69 |
|
70 |
+
sentiment_label, sentiment_score = analyze_sentiment(icelandic_text)
|
71 |
+
sentiment_analysis = analyze_text_with_influence(icelandic_text, sentiment_bench, sentiment_label)
|
|
|
|
|
|
|
72 |
|
|
|
73 |
formality_label, formality_score = analyze_formality(icelandic_text)
|
74 |
+
formality_analysis = analyze_text_with_influence(icelandic_text, formality_bench, formality_label)
|
75 |
+
|
76 |
toxicity_results = analyze_toxicity(translated_text)
|
|
|
|
|
77 |
toxicity_label = '1' if toxicity_results['score'] >= 0.5 else '0'
|
78 |
+
toxicity_analysis = analyze_text_with_influence(translated_text, toxicity_bench, toxicity_label)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
|
80 |
+
politeness_label, politeness_score = analyze_politeness(translated_text)
|
81 |
+
politeness_analysis = analyze_text_with_influence(translated_text, politeness_bench, politeness_label)
|
82 |
+
|
83 |
+
analysis_results = f"""
|
84 |
+
Translated Text: {translated_text}\n\n
|
85 |
+
Sentiment: {sentiment_analysis[0]}, Score: {round(sentiment_score, 2)}\n
|
86 |
+
Influential Words in Sentiment: {sentiment_analysis[1]}\n
|
87 |
+
Formality: {formality_analysis[0]}, Score: {round(formality_score, 2)}\n
|
88 |
+
Influential Words in Formality: {formality_analysis[1]}\n
|
89 |
+
Toxicity: {toxicity_analysis[0]}, Score: {round(toxicity_results['score'], 2)}\n
|
90 |
+
Influential Words in Toxicity: {toxicity_analysis[1]}\n
|
91 |
+
Politeness: {politeness_analysis[0]}, Score: {round(politeness_score, 2)}\n
|
92 |
+
Influential Words in Politeness: {politeness_analysis[1]}
|
93 |
+
"""
|
94 |
+
return analysis_results.strip()
|
95 |
|
96 |
demo = gr.Interface(fn=analyze_text,
|
97 |
inputs=gr.Textbox(lines=2, placeholder="Enter Icelandic Text Here..."),
|
98 |
outputs=gr.Textbox(label="Analysis Results"),
|
99 |
title="Icelandic Text Analysis",
|
100 |
+
description="This app translates Icelandic text to English and performs sentiment, formality, toxicity, and politeness analysis along with influential words analysis.")
|
101 |
|
102 |
if __name__ == "__main__":
|
103 |
demo.launch()
|