Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -3,95 +3,62 @@ from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassifica
|
|
3 |
import gradio as gr
|
4 |
from ferret import Benchmark
|
5 |
|
6 |
-
#
|
7 |
sentiment_tokenizer = AutoTokenizer.from_pretrained("Birkir/electra-base-igc-is-sentiment-analysis")
|
8 |
sentiment_model = AutoModelForSequenceClassification.from_pretrained("Birkir/electra-base-igc-is-sentiment-analysis")
|
9 |
-
sentiment_bench = Benchmark(sentiment_model, sentiment_tokenizer)
|
10 |
-
|
11 |
-
# Formality
|
12 |
formality_tokenizer = AutoTokenizer.from_pretrained("svanhvit/formality-classification-icebert")
|
13 |
formality_model = AutoModelForSequenceClassification.from_pretrained("svanhvit/formality-classification-icebert")
|
14 |
-
formality_bench = Benchmark(formality_model, formality_tokenizer)
|
15 |
-
|
16 |
-
# Toxicity
|
17 |
toxicity_tokenizer = AutoTokenizer.from_pretrained("unitary/toxic-bert")
|
18 |
toxicity_model = AutoModelForSequenceClassification.from_pretrained("unitary/toxic-bert")
|
19 |
-
toxicity_bench = Benchmark(toxicity_model, toxicity_tokenizer)
|
20 |
-
|
21 |
-
# Politeness
|
22 |
politeness_tokenizer = AutoTokenizer.from_pretrained("Genius1237/xlm-roberta-large-tydip")
|
23 |
politeness_model = AutoModelForSequenceClassification.from_pretrained("Genius1237/xlm-roberta-large-tydip")
|
|
|
|
|
|
|
|
|
|
|
24 |
politeness_bench = Benchmark(politeness_model, politeness_tokenizer)
|
25 |
|
26 |
-
#
|
27 |
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-is-en")
|
28 |
-
sentiment_classifier = pipeline("text-classification", model="Birkir/electra-base-igc-is-sentiment-analysis")
|
29 |
-
formality_classifier = pipeline("text-classification", model="svanhvit/formality-classification-icebert")
|
30 |
-
detoxify_pipeline = pipeline('text-classification', model='unitary/toxic-bert', tokenizer='bert-base-uncased', function_to_apply='sigmoid', top_k=None)
|
31 |
-
politeness_classifier = pipeline("text-classification", model="Genius1237/xlm-roberta-large-tydip")
|
32 |
|
33 |
def replace_encoding(tokens):
|
34 |
return [token.replace('臓', ' ') for token in tokens]
|
35 |
|
36 |
-
def
|
37 |
-
explanations = bench.explain(text, target=
|
38 |
influential_words = []
|
39 |
for explanation in explanations:
|
40 |
if explanation.explainer == 'Partition SHAP':
|
41 |
tokens = replace_encoding(explanation.tokens)
|
42 |
-
|
43 |
-
|
44 |
-
influential_words_str
|
45 |
-
return label_conversion, influential_words_str
|
46 |
-
|
47 |
-
def translate_text(text):
|
48 |
-
translation = translator(text, max_length=512)
|
49 |
-
return translation[0]['translation_text']
|
50 |
-
|
51 |
-
def analyze_sentiment(text):
|
52 |
-
sentiment_result = sentiment_classifier(text)
|
53 |
-
return sentiment_result[0]['label'].replace("LABEL_", ""), sentiment_result[0]['score']
|
54 |
-
|
55 |
-
def analyze_formality(text):
|
56 |
-
formality_result = formality_classifier(text)
|
57 |
-
return formality_result[0]['label'], formality_result[0]['score']
|
58 |
-
|
59 |
-
def analyze_toxicity(text):
|
60 |
-
toxicity_results = detoxify_pipeline(text)
|
61 |
-
return toxicity_results[0]
|
62 |
-
|
63 |
-
def analyze_politeness(text):
|
64 |
-
politeness_result = politeness_classifier(text)
|
65 |
-
return politeness_result[0]['label'], politeness_result[0]['score']
|
66 |
|
67 |
def analyze_text(icelandic_text):
|
68 |
translated_text = translate_text(icelandic_text)
|
69 |
-
|
70 |
sentiment_label, sentiment_score = analyze_sentiment(icelandic_text)
|
71 |
-
sentiment_analysis = analyze_text_with_influence(icelandic_text, sentiment_bench, sentiment_label)
|
72 |
-
|
73 |
formality_label, formality_score = analyze_formality(icelandic_text)
|
74 |
-
formality_analysis = analyze_text_with_influence(icelandic_text, formality_bench, formality_label)
|
75 |
-
|
76 |
toxicity_results = analyze_toxicity(translated_text)
|
77 |
-
toxicity_label = '1' if toxicity_results['score'] >= 0.5 else '0'
|
78 |
-
toxicity_analysis = analyze_text_with_influence(translated_text, toxicity_bench, toxicity_label)
|
79 |
-
|
80 |
politeness_label, politeness_score = analyze_politeness(translated_text)
|
81 |
-
politeness_analysis = analyze_text_with_influence(translated_text, politeness_bench, politeness_label)
|
82 |
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
|
|
|
|
|
|
|
|
|
|
95 |
|
96 |
demo = gr.Interface(fn=analyze_text,
|
97 |
inputs=gr.Textbox(lines=2, placeholder="Enter Icelandic Text Here..."),
|
@@ -100,4 +67,4 @@ demo = gr.Interface(fn=analyze_text,
|
|
100 |
description="This app translates Icelandic text to English and performs sentiment, formality, toxicity, and politeness analysis along with influential words analysis.")
|
101 |
|
102 |
if __name__ == "__main__":
|
103 |
-
demo.launch()
|
|
|
3 |
import gradio as gr
|
4 |
from ferret import Benchmark
|
5 |
|
6 |
+
# Load models and tokenizers
|
7 |
sentiment_tokenizer = AutoTokenizer.from_pretrained("Birkir/electra-base-igc-is-sentiment-analysis")
|
8 |
sentiment_model = AutoModelForSequenceClassification.from_pretrained("Birkir/electra-base-igc-is-sentiment-analysis")
|
|
|
|
|
|
|
9 |
formality_tokenizer = AutoTokenizer.from_pretrained("svanhvit/formality-classification-icebert")
|
10 |
formality_model = AutoModelForSequenceClassification.from_pretrained("svanhvit/formality-classification-icebert")
|
|
|
|
|
|
|
11 |
toxicity_tokenizer = AutoTokenizer.from_pretrained("unitary/toxic-bert")
|
12 |
toxicity_model = AutoModelForSequenceClassification.from_pretrained("unitary/toxic-bert")
|
|
|
|
|
|
|
13 |
politeness_tokenizer = AutoTokenizer.from_pretrained("Genius1237/xlm-roberta-large-tydip")
|
14 |
politeness_model = AutoModelForSequenceClassification.from_pretrained("Genius1237/xlm-roberta-large-tydip")
|
15 |
+
|
16 |
+
# Initialize benchmarks
|
17 |
+
sentiment_bench = Benchmark(sentiment_model, sentiment_tokenizer)
|
18 |
+
formality_bench = Benchmark(formality_model, formality_tokenizer)
|
19 |
+
toxicity_bench = Benchmark(toxicity_model, toxicity_tokenizer)
|
20 |
politeness_bench = Benchmark(politeness_model, politeness_tokenizer)
|
21 |
|
22 |
+
# Translation pipeline
|
23 |
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-is-en")
|
|
|
|
|
|
|
|
|
24 |
|
25 |
def replace_encoding(tokens):
|
26 |
return [token.replace('臓', ' ') for token in tokens]
|
27 |
|
28 |
+
def analyze_with_influence(text, bench):
|
29 |
+
explanations = bench.explain(text, target=0) # Assume target=0 for binary classification; adjust if needed
|
30 |
influential_words = []
|
31 |
for explanation in explanations:
|
32 |
if explanation.explainer == 'Partition SHAP':
|
33 |
tokens = replace_encoding(explanation.tokens)
|
34 |
+
influential_words.extend(tokens)
|
35 |
+
influential_words_str = "; ".join(influential_words)
|
36 |
+
return influential_words_str
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
def analyze_text(icelandic_text):
|
39 |
translated_text = translate_text(icelandic_text)
|
|
|
40 |
sentiment_label, sentiment_score = analyze_sentiment(icelandic_text)
|
|
|
|
|
41 |
formality_label, formality_score = analyze_formality(icelandic_text)
|
|
|
|
|
42 |
toxicity_results = analyze_toxicity(translated_text)
|
|
|
|
|
|
|
43 |
politeness_label, politeness_score = analyze_politeness(translated_text)
|
|
|
44 |
|
45 |
+
sentiment_influential_words = analyze_with_influence(icelandic_text, sentiment_bench)
|
46 |
+
formality_influential_words = analyze_with_influence(icelandic_text, formality_bench)
|
47 |
+
toxicity_influential_words = analyze_with_influence(translated_text, toxicity_bench)
|
48 |
+
politeness_influential_words = analyze_with_influence(translated_text, politeness_bench)
|
49 |
+
|
50 |
+
analysis_results = (
|
51 |
+
f"Translated Text: {translated_text}\n\n"
|
52 |
+
f"Sentiment: {sentiment_label}, Score: {round(sentiment_score, 2)}\n"
|
53 |
+
f"Influential Words in Sentiment: {sentiment_influential_words}\n"
|
54 |
+
f"Formality: {formality_label}, Score: {round(formality_score, 2)}\n"
|
55 |
+
f"Influential Words in Formality: {formality_influential_words}\n"
|
56 |
+
f"Toxicity: Label: {'1' if toxicity_results['score'] >= 0.5 else '0'}, Score: {round(toxicity_results['score'], 2)}\n"
|
57 |
+
f"Influential Words in Toxicity: {toxicity_influential_words}\n"
|
58 |
+
f"Politeness: {politeness_label}, Score: {round(politeness_score, 2)}\n"
|
59 |
+
f"Influential Words in Politeness: {politeness_influential_words}"
|
60 |
+
)
|
61 |
+
return analysis_results
|
62 |
|
63 |
demo = gr.Interface(fn=analyze_text,
|
64 |
inputs=gr.Textbox(lines=2, placeholder="Enter Icelandic Text Here..."),
|
|
|
67 |
description="This app translates Icelandic text to English and performs sentiment, formality, toxicity, and politeness analysis along with influential words analysis.")
|
68 |
|
69 |
if __name__ == "__main__":
|
70 |
+
demo.launch()
|