Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -3,29 +3,28 @@ from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassifica
|
|
3 |
import gradio as gr
|
4 |
from ferret import Benchmark
|
5 |
|
6 |
-
# Load models and tokenizers
|
7 |
sentiment_tokenizer = AutoTokenizer.from_pretrained("Birkir/electra-base-igc-is-sentiment-analysis")
|
8 |
sentiment_model = AutoModelForSequenceClassification.from_pretrained("Birkir/electra-base-igc-is-sentiment-analysis")
|
9 |
-
sentiment_bench = Benchmark(sentiment_model, sentiment_tokenizer)
|
10 |
-
|
11 |
-
# Load models and tokenizers for formality analysis
|
12 |
formality_tokenizer = AutoTokenizer.from_pretrained("svanhvit/formality-classification-icebert")
|
13 |
formality_model = AutoModelForSequenceClassification.from_pretrained("svanhvit/formality-classification-icebert")
|
14 |
-
formality_bench = Benchmark(formality_model, formality_tokenizer)
|
15 |
-
|
16 |
-
# Load models and tokenizers for toxicity analysis
|
17 |
toxicity_tokenizer = AutoTokenizer.from_pretrained("unitary/toxic-bert")
|
18 |
toxicity_model = AutoModelForSequenceClassification.from_pretrained("unitary/toxic-bert")
|
19 |
-
toxicity_bench = Benchmark(toxicity_model, toxicity_tokenizer)
|
20 |
-
|
21 |
-
# Load models and tokenizers for politeness analysis
|
22 |
politeness_tokenizer = AutoTokenizer.from_pretrained("Genius1237/xlm-roberta-large-tydip")
|
23 |
politeness_model = AutoModelForSequenceClassification.from_pretrained("Genius1237/xlm-roberta-large-tydip")
|
|
|
|
|
|
|
|
|
|
|
24 |
politeness_bench = Benchmark(politeness_model, politeness_tokenizer)
|
25 |
|
26 |
-
# Initialize pipelines for translation and
|
27 |
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-is-en")
|
28 |
-
|
|
|
|
|
|
|
29 |
|
30 |
def replace_encoding(tokens):
|
31 |
return [token.replace('臓', ' ')
|
@@ -60,19 +59,33 @@ def analyze_with_influence(text, bench):
|
|
60 |
return influential_words_str
|
61 |
|
62 |
def analyze_text(icelandic_text):
|
|
|
63 |
translated_text = translator(icelandic_text, max_length=512)[0]['translation_text']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
|
65 |
-
|
66 |
-
|
67 |
-
toxicity_analysis = analyze_with_influence(translated_text, toxicity_bench)
|
68 |
-
politeness_analysis = analyze_with_influence(translated_text, politeness_bench)
|
69 |
|
70 |
analysis_results = f"""
|
71 |
Translated Text: {translated_text}\n\n
|
72 |
-
|
73 |
-
|
74 |
-
Toxicity Analysis with Influential Words: {toxicity_analysis}\n
|
75 |
-
Politeness Analysis with Influential Words: {politeness_analysis}
|
76 |
"""
|
77 |
return analysis_results.strip()
|
78 |
|
@@ -80,7 +93,7 @@ demo = gr.Interface(fn=analyze_text,
|
|
80 |
inputs=gr.Textbox(lines=2, placeholder="Enter Icelandic Text Here..."),
|
81 |
outputs=gr.Textbox(label="Analysis Results"),
|
82 |
title="Icelandic Text Analysis",
|
83 |
-
description="This app translates Icelandic text to English and performs analysis with influential words for
|
84 |
|
85 |
if __name__ == "__main__":
|
86 |
demo.launch()
|
|
|
3 |
import gradio as gr
|
4 |
from ferret import Benchmark
|
5 |
|
6 |
+
# Load models and tokenizers
|
7 |
sentiment_tokenizer = AutoTokenizer.from_pretrained("Birkir/electra-base-igc-is-sentiment-analysis")
|
8 |
sentiment_model = AutoModelForSequenceClassification.from_pretrained("Birkir/electra-base-igc-is-sentiment-analysis")
|
|
|
|
|
|
|
9 |
formality_tokenizer = AutoTokenizer.from_pretrained("svanhvit/formality-classification-icebert")
|
10 |
formality_model = AutoModelForSequenceClassification.from_pretrained("svanhvit/formality-classification-icebert")
|
|
|
|
|
|
|
11 |
toxicity_tokenizer = AutoTokenizer.from_pretrained("unitary/toxic-bert")
|
12 |
toxicity_model = AutoModelForSequenceClassification.from_pretrained("unitary/toxic-bert")
|
|
|
|
|
|
|
13 |
politeness_tokenizer = AutoTokenizer.from_pretrained("Genius1237/xlm-roberta-large-tydip")
|
14 |
politeness_model = AutoModelForSequenceClassification.from_pretrained("Genius1237/xlm-roberta-large-tydip")
|
15 |
+
|
16 |
+
# Initialize benchmarks
|
17 |
+
sentiment_bench = Benchmark(sentiment_model, sentiment_tokenizer)
|
18 |
+
formality_bench = Benchmark(formality_model, formality_tokenizer)
|
19 |
+
toxicity_bench = Benchmark(toxicity_model, toxicity_tokenizer)
|
20 |
politeness_bench = Benchmark(politeness_model, politeness_tokenizer)
|
21 |
|
22 |
+
# Initialize pipelines for translation and text classification
|
23 |
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-is-en")
|
24 |
+
sentiment_classifier = pipeline("text-classification", model="Birkir/electra-base-igc-is-sentiment-analysis")
|
25 |
+
formality_classifier = pipeline("text-classification", model="svanhvit/formality-classification-icebert")
|
26 |
+
detoxify_classifier = pipeline('text-classification', model='unitary/toxic-bert', tokenizer='bert-base-uncased', function_to_apply='sigmoid', top_k=None)
|
27 |
+
politeness_classifier = pipeline("text-classification", model="Genius1237/xlm-roberta-large-tydip")
|
28 |
|
29 |
def replace_encoding(tokens):
|
30 |
return [token.replace('臓', ' ')
|
|
|
59 |
return influential_words_str
|
60 |
|
61 |
def analyze_text(icelandic_text):
|
62 |
+
# Perform translations
|
63 |
translated_text = translator(icelandic_text, max_length=512)[0]['translation_text']
|
64 |
+
|
65 |
+
# Perform initial analysis to get scores
|
66 |
+
sentiment_result = sentiment_classifier(icelandic_text)[0]
|
67 |
+
formality_result = formality_classifier(icelandic_text)[0]
|
68 |
+
toxicity_result = detoxify_classifier(translated_text)[0]
|
69 |
+
politeness_result = politeness_classifier(translated_text)[0]
|
70 |
+
|
71 |
+
# Gather scores and labels
|
72 |
+
scores_labels = {
|
73 |
+
"Sentiment": (sentiment_result['score'], sentiment_bench),
|
74 |
+
"Formality": (formality_result['score'], formality_bench),
|
75 |
+
"Toxicity": (toxicity_result['score'], toxicity_bench),
|
76 |
+
"Politeness": (politeness_result['score'], politeness_bench)
|
77 |
+
}
|
78 |
+
|
79 |
+
# Identify the aspect with the lowest score
|
80 |
+
lowest_aspect = min(scores_labels, key=lambda x: scores_labels[x][0])
|
81 |
|
82 |
+
# Perform Ferret analysis on the aspect with the lowest score
|
83 |
+
influential_words = analyze_with_influence(icelandic_text if lowest_aspect in ["Sentiment", "Formality"] else translated_text, scores_labels[lowest_aspect][1])
|
|
|
|
|
84 |
|
85 |
analysis_results = f"""
|
86 |
Translated Text: {translated_text}\n\n
|
87 |
+
Lowest Score Aspect: {lowest_aspect}\n
|
88 |
+
Influential Words in {lowest_aspect}: {influential_words}
|
|
|
|
|
89 |
"""
|
90 |
return analysis_results.strip()
|
91 |
|
|
|
93 |
inputs=gr.Textbox(lines=2, placeholder="Enter Icelandic Text Here..."),
|
94 |
outputs=gr.Textbox(label="Analysis Results"),
|
95 |
title="Icelandic Text Analysis",
|
96 |
+
description="This app translates Icelandic text to English and performs analysis with influential words for the aspect with the lowest score.")
|
97 |
|
98 |
if __name__ == "__main__":
|
99 |
demo.launch()
|