Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -3,57 +3,32 @@ from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassifica
|
|
3 |
import gradio as gr
|
4 |
from ferret import Benchmark
|
5 |
|
6 |
-
# Load models and tokenizers
|
7 |
sentiment_tokenizer = AutoTokenizer.from_pretrained("Birkir/electra-base-igc-is-sentiment-analysis")
|
8 |
sentiment_model = AutoModelForSequenceClassification.from_pretrained("Birkir/electra-base-igc-is-sentiment-analysis")
|
|
|
|
|
|
|
9 |
formality_tokenizer = AutoTokenizer.from_pretrained("svanhvit/formality-classification-icebert")
|
10 |
formality_model = AutoModelForSequenceClassification.from_pretrained("svanhvit/formality-classification-icebert")
|
|
|
|
|
|
|
11 |
toxicity_tokenizer = AutoTokenizer.from_pretrained("unitary/toxic-bert")
|
12 |
toxicity_model = AutoModelForSequenceClassification.from_pretrained("unitary/toxic-bert")
|
|
|
|
|
|
|
13 |
politeness_tokenizer = AutoTokenizer.from_pretrained("Genius1237/xlm-roberta-large-tydip")
|
14 |
politeness_model = AutoModelForSequenceClassification.from_pretrained("Genius1237/xlm-roberta-large-tydip")
|
15 |
-
|
16 |
-
# Initialize benchmarks
|
17 |
-
sentiment_bench = Benchmark(sentiment_model, sentiment_tokenizer)
|
18 |
-
formality_bench = Benchmark(formality_model, formality_tokenizer)
|
19 |
-
toxicity_bench = Benchmark(toxicity_model, toxicity_tokenizer)
|
20 |
politeness_bench = Benchmark(politeness_model, politeness_tokenizer)
|
21 |
|
22 |
-
#
|
23 |
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-is-en")
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
def analyze_sentiment(text):
|
30 |
-
sentiment_result = sentiment_classifier(text)
|
31 |
-
return sentiment_result[0]['label'].replace("LABEL_", ""), sentiment_result[0]['score']
|
32 |
-
|
33 |
-
def analyze_formality(text):
|
34 |
-
formality_result = formality_classifier(text)
|
35 |
-
return formality_result[0]['label'], formality_result[0]['score']
|
36 |
-
|
37 |
-
def analyze_toxicity(text):
|
38 |
-
toxicity_results = detoxify_pipeline(text)
|
39 |
-
return '1' if toxicity_results[0]['score'] >= 0.5 else '0', toxicity_results[0]['score']
|
40 |
-
|
41 |
-
def analyze_politeness(text):
|
42 |
-
politeness_result = politeness_classifier(text)
|
43 |
-
return politeness_result[0]['label'], politeness_result[0]['score']
|
44 |
-
|
45 |
-
def replace_encoding(tokens):
|
46 |
-
return [token.replace('臓', ' ') for token in tokens]
|
47 |
-
|
48 |
-
def analyze_with_influence(text, bench):
|
49 |
-
explanations = bench.explain(text, target=0) # Assume target=0 for binary classification; adjust if needed
|
50 |
-
influential_words = []
|
51 |
-
for explanation in explanations:
|
52 |
-
if explanation.explainer == 'Partition SHAP':
|
53 |
-
tokens = replace_encoding(explanation.tokens)
|
54 |
-
influential_words.extend(tokens)
|
55 |
-
influential_words_str = "; ".join(influential_words)
|
56 |
-
return influential_words_str
|
57 |
|
58 |
def replace_encoding(tokens):
|
59 |
return [token.replace('臓', ' ')
|
@@ -75,12 +50,23 @@ def replace_encoding(tokens):
|
|
75 |
.replace('脙墨', '脡')
|
76 |
.replace('脙募', '媒')
|
77 |
for token in tokens[1:-1]]
|
78 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
def analyze_text(icelandic_text):
|
80 |
-
translated_text =
|
81 |
sentiment_label, sentiment_score = analyze_sentiment(icelandic_text)
|
82 |
formality_label, formality_score = analyze_formality(icelandic_text)
|
83 |
-
|
84 |
politeness_label, politeness_score = analyze_politeness(translated_text)
|
85 |
|
86 |
sentiment_influential_words = analyze_with_influence(icelandic_text, sentiment_bench)
|
@@ -88,18 +74,18 @@ def analyze_text(icelandic_text):
|
|
88 |
toxicity_influential_words = analyze_with_influence(translated_text, toxicity_bench)
|
89 |
politeness_influential_words = analyze_with_influence(translated_text, politeness_bench)
|
90 |
|
91 |
-
analysis_results =
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
return analysis_results
|
103 |
|
104 |
demo = gr.Interface(fn=analyze_text,
|
105 |
inputs=gr.Textbox(lines=2, placeholder="Enter Icelandic Text Here..."),
|
|
|
3 |
import gradio as gr
|
4 |
from ferret import Benchmark
|
5 |
|
6 |
+
# Load models and tokenizers for sentiment analysis
|
7 |
sentiment_tokenizer = AutoTokenizer.from_pretrained("Birkir/electra-base-igc-is-sentiment-analysis")
|
8 |
sentiment_model = AutoModelForSequenceClassification.from_pretrained("Birkir/electra-base-igc-is-sentiment-analysis")
|
9 |
+
sentiment_bench = Benchmark(sentiment_model, sentiment_tokenizer)
|
10 |
+
|
11 |
+
# Load models and tokenizers for formality analysis
|
12 |
formality_tokenizer = AutoTokenizer.from_pretrained("svanhvit/formality-classification-icebert")
|
13 |
formality_model = AutoModelForSequenceClassification.from_pretrained("svanhvit/formality-classification-icebert")
|
14 |
+
formality_bench = Benchmark(formality_model, formality_tokenizer)
|
15 |
+
|
16 |
+
# Load models and tokenizers for toxicity analysis
|
17 |
toxicity_tokenizer = AutoTokenizer.from_pretrained("unitary/toxic-bert")
|
18 |
toxicity_model = AutoModelForSequenceClassification.from_pretrained("unitary/toxic-bert")
|
19 |
+
toxicity_bench = Benchmark(toxicity_model, toxicity_tokenizer)
|
20 |
+
|
21 |
+
# Load models and tokenizers for politeness analysis
|
22 |
politeness_tokenizer = AutoTokenizer.from_pretrained("Genius1237/xlm-roberta-large-tydip")
|
23 |
politeness_model = AutoModelForSequenceClassification.from_pretrained("Genius1237/xlm-roberta-large-tydip")
|
|
|
|
|
|
|
|
|
|
|
24 |
politeness_bench = Benchmark(politeness_model, politeness_tokenizer)
|
25 |
|
26 |
+
# Initialize pipelines for translation and classifiers
|
27 |
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-is-en")
|
28 |
+
sentiment_classifier = pipeline("text-classification", model="Birkir/electra-base-igc-is-sentiment-analysis")
|
29 |
+
formality_classifier = pipeline("text-classification", model="svanhvit/formality-classification-icebert")
|
30 |
+
detoxify_pipeline = pipeline('text-classification', model='unitary/toxic-bert', tokenizer='bert-base-uncased', function_to_apply='sigmoid', top_k=None)
|
31 |
+
politeness_classifier = pipeline("text-classification", model="Genius1237/xlm-roberta-large-tydip")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
|
33 |
def replace_encoding(tokens):
|
34 |
return [token.replace('臓', ' ')
|
|
|
50 |
.replace('脙墨', '脡')
|
51 |
.replace('脙募', '媒')
|
52 |
for token in tokens[1:-1]]
|
53 |
+
|
54 |
+
def analyze_with_influence(text, bench):
|
55 |
+
explanations = bench.explain(text, target=0)
|
56 |
+
influential_words = []
|
57 |
+
for explanation in explanations:
|
58 |
+
if explanation.explainer == 'Partition SHAP':
|
59 |
+
tokens = replace_encoding(explanation.tokens)
|
60 |
+
token_score_pairs = zip(tokens, explanation.scores)
|
61 |
+
influential_words.extend([(token, score) for token, score in token_score_pairs])
|
62 |
+
influential_words_str = "; ".join([f"{token} ({score:.2f})" for token, score in influential_words])
|
63 |
+
return influential_words_str
|
64 |
+
|
65 |
def analyze_text(icelandic_text):
|
66 |
+
translated_text = translator(icelandic_text, max_length=512)[0]['translation_text']
|
67 |
sentiment_label, sentiment_score = analyze_sentiment(icelandic_text)
|
68 |
formality_label, formality_score = analyze_formality(icelandic_text)
|
69 |
+
toxicity_label, toxicity_score = analyze_toxicity(translated_text)
|
70 |
politeness_label, politeness_score = analyze_politeness(translated_text)
|
71 |
|
72 |
sentiment_influential_words = analyze_with_influence(icelandic_text, sentiment_bench)
|
|
|
74 |
toxicity_influential_words = analyze_with_influence(translated_text, toxicity_bench)
|
75 |
politeness_influential_words = analyze_with_influence(translated_text, politeness_bench)
|
76 |
|
77 |
+
analysis_results = f"""
|
78 |
+
Translated Text: {translated_text}\n\n
|
79 |
+
Sentiment: {sentiment_label}, Score: {round(sentiment_score, 2)}\n
|
80 |
+
Influential Words in Sentiment: {sentiment_influential_words}\n
|
81 |
+
Formality: {formality_label}, Score: {round(formality_score, 2)}\n
|
82 |
+
Influential Words in Formality: {formality_influential_words}\n
|
83 |
+
Toxicity: {toxicity_label}, Score: {round(toxicity_score, 2)}\n
|
84 |
+
Influential Words in Toxicity: {toxicity_influential_words}\n
|
85 |
+
Politeness: {politeness_label}, Score: {round(politeness_score, 2)}\n
|
86 |
+
Influential Words in Politeness: {politeness_influential_words}
|
87 |
+
"""
|
88 |
+
return analysis_results.strip()
|
89 |
|
90 |
demo = gr.Interface(fn=analyze_text,
|
91 |
inputs=gr.Textbox(lines=2, placeholder="Enter Icelandic Text Here..."),
|