karalif commited on
Commit
cd8fa87
verified
1 Parent(s): 546ed8e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -55
app.py CHANGED
@@ -3,57 +3,32 @@ from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassifica
3
  import gradio as gr
4
  from ferret import Benchmark
5
 
6
- # Load models and tokenizers
7
  sentiment_tokenizer = AutoTokenizer.from_pretrained("Birkir/electra-base-igc-is-sentiment-analysis")
8
  sentiment_model = AutoModelForSequenceClassification.from_pretrained("Birkir/electra-base-igc-is-sentiment-analysis")
 
 
 
9
  formality_tokenizer = AutoTokenizer.from_pretrained("svanhvit/formality-classification-icebert")
10
  formality_model = AutoModelForSequenceClassification.from_pretrained("svanhvit/formality-classification-icebert")
 
 
 
11
  toxicity_tokenizer = AutoTokenizer.from_pretrained("unitary/toxic-bert")
12
  toxicity_model = AutoModelForSequenceClassification.from_pretrained("unitary/toxic-bert")
 
 
 
13
  politeness_tokenizer = AutoTokenizer.from_pretrained("Genius1237/xlm-roberta-large-tydip")
14
  politeness_model = AutoModelForSequenceClassification.from_pretrained("Genius1237/xlm-roberta-large-tydip")
15
-
16
- # Initialize benchmarks
17
- sentiment_bench = Benchmark(sentiment_model, sentiment_tokenizer)
18
- formality_bench = Benchmark(formality_model, formality_tokenizer)
19
- toxicity_bench = Benchmark(toxicity_model, toxicity_tokenizer)
20
  politeness_bench = Benchmark(politeness_model, politeness_tokenizer)
21
 
22
- # Translation pipeline
23
  translator = pipeline("translation", model="Helsinki-NLP/opus-mt-is-en")
24
-
25
- def translate_text(text):
26
- translation = translator(text, max_length=512)
27
- return translation[0]['translation_text']
28
-
29
- def analyze_sentiment(text):
30
- sentiment_result = sentiment_classifier(text)
31
- return sentiment_result[0]['label'].replace("LABEL_", ""), sentiment_result[0]['score']
32
-
33
- def analyze_formality(text):
34
- formality_result = formality_classifier(text)
35
- return formality_result[0]['label'], formality_result[0]['score']
36
-
37
- def analyze_toxicity(text):
38
- toxicity_results = detoxify_pipeline(text)
39
- return '1' if toxicity_results[0]['score'] >= 0.5 else '0', toxicity_results[0]['score']
40
-
41
- def analyze_politeness(text):
42
- politeness_result = politeness_classifier(text)
43
- return politeness_result[0]['label'], politeness_result[0]['score']
44
-
45
- def replace_encoding(tokens):
46
- return [token.replace('臓', ' ') for token in tokens]
47
-
48
- def analyze_with_influence(text, bench):
49
- explanations = bench.explain(text, target=0) # Assume target=0 for binary classification; adjust if needed
50
- influential_words = []
51
- for explanation in explanations:
52
- if explanation.explainer == 'Partition SHAP':
53
- tokens = replace_encoding(explanation.tokens)
54
- influential_words.extend(tokens)
55
- influential_words_str = "; ".join(influential_words)
56
- return influential_words_str
57
 
58
  def replace_encoding(tokens):
59
  return [token.replace('臓', ' ')
@@ -75,12 +50,23 @@ def replace_encoding(tokens):
75
  .replace('脙墨', '脡')
76
  .replace('脙募', '媒')
77
  for token in tokens[1:-1]]
78
-
 
 
 
 
 
 
 
 
 
 
 
79
  def analyze_text(icelandic_text):
80
- translated_text = translate_text(icelandic_text)
81
  sentiment_label, sentiment_score = analyze_sentiment(icelandic_text)
82
  formality_label, formality_score = analyze_formality(icelandic_text)
83
- toxicity_results = analyze_toxicity(translated_text)
84
  politeness_label, politeness_score = analyze_politeness(translated_text)
85
 
86
  sentiment_influential_words = analyze_with_influence(icelandic_text, sentiment_bench)
@@ -88,18 +74,18 @@ def analyze_text(icelandic_text):
88
  toxicity_influential_words = analyze_with_influence(translated_text, toxicity_bench)
89
  politeness_influential_words = analyze_with_influence(translated_text, politeness_bench)
90
 
91
- analysis_results = (
92
- f"Translated Text: {translated_text}\n\n"
93
- f"Sentiment: {sentiment_label}, Score: {round(sentiment_score, 2)}\n"
94
- f"Influential Words in Sentiment: {sentiment_influential_words}\n"
95
- f"Formality: {formality_label}, Score: {round(formality_score, 2)}\n"
96
- f"Influential Words in Formality: {formality_influential_words}\n"
97
- f"Toxicity: Label: {'1' if toxicity_results['score'] >= 0.5 else '0'}, Score: {round(toxicity_results['score'], 2)}\n"
98
- f"Influential Words in Toxicity: {toxicity_influential_words}\n"
99
- f"Politeness: {politeness_label}, Score: {round(politeness_score, 2)}\n"
100
- f"Influential Words in Politeness: {politeness_influential_words}"
101
- )
102
- return analysis_results
103
 
104
  demo = gr.Interface(fn=analyze_text,
105
  inputs=gr.Textbox(lines=2, placeholder="Enter Icelandic Text Here..."),
 
3
  import gradio as gr
4
  from ferret import Benchmark
5
 
6
+ # Load models and tokenizers for sentiment analysis
7
  sentiment_tokenizer = AutoTokenizer.from_pretrained("Birkir/electra-base-igc-is-sentiment-analysis")
8
  sentiment_model = AutoModelForSequenceClassification.from_pretrained("Birkir/electra-base-igc-is-sentiment-analysis")
9
+ sentiment_bench = Benchmark(sentiment_model, sentiment_tokenizer)
10
+
11
+ # Load models and tokenizers for formality analysis
12
  formality_tokenizer = AutoTokenizer.from_pretrained("svanhvit/formality-classification-icebert")
13
  formality_model = AutoModelForSequenceClassification.from_pretrained("svanhvit/formality-classification-icebert")
14
+ formality_bench = Benchmark(formality_model, formality_tokenizer)
15
+
16
+ # Load models and tokenizers for toxicity analysis
17
  toxicity_tokenizer = AutoTokenizer.from_pretrained("unitary/toxic-bert")
18
  toxicity_model = AutoModelForSequenceClassification.from_pretrained("unitary/toxic-bert")
19
+ toxicity_bench = Benchmark(toxicity_model, toxicity_tokenizer)
20
+
21
+ # Load models and tokenizers for politeness analysis
22
  politeness_tokenizer = AutoTokenizer.from_pretrained("Genius1237/xlm-roberta-large-tydip")
23
  politeness_model = AutoModelForSequenceClassification.from_pretrained("Genius1237/xlm-roberta-large-tydip")
 
 
 
 
 
24
  politeness_bench = Benchmark(politeness_model, politeness_tokenizer)
25
 
26
+ # Initialize pipelines for translation and classifiers
27
  translator = pipeline("translation", model="Helsinki-NLP/opus-mt-is-en")
28
+ sentiment_classifier = pipeline("text-classification", model="Birkir/electra-base-igc-is-sentiment-analysis")
29
+ formality_classifier = pipeline("text-classification", model="svanhvit/formality-classification-icebert")
30
+ detoxify_pipeline = pipeline('text-classification', model='unitary/toxic-bert', tokenizer='bert-base-uncased', function_to_apply='sigmoid', top_k=None)
31
+ politeness_classifier = pipeline("text-classification", model="Genius1237/xlm-roberta-large-tydip")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
  def replace_encoding(tokens):
34
  return [token.replace('臓', ' ')
 
50
  .replace('脙墨', '脡')
51
  .replace('脙募', '媒')
52
  for token in tokens[1:-1]]
53
+
54
+ def analyze_with_influence(text, bench):
55
+ explanations = bench.explain(text, target=0)
56
+ influential_words = []
57
+ for explanation in explanations:
58
+ if explanation.explainer == 'Partition SHAP':
59
+ tokens = replace_encoding(explanation.tokens)
60
+ token_score_pairs = zip(tokens, explanation.scores)
61
+ influential_words.extend([(token, score) for token, score in token_score_pairs])
62
+ influential_words_str = "; ".join([f"{token} ({score:.2f})" for token, score in influential_words])
63
+ return influential_words_str
64
+
65
  def analyze_text(icelandic_text):
66
+ translated_text = translator(icelandic_text, max_length=512)[0]['translation_text']
67
  sentiment_label, sentiment_score = analyze_sentiment(icelandic_text)
68
  formality_label, formality_score = analyze_formality(icelandic_text)
69
+ toxicity_label, toxicity_score = analyze_toxicity(translated_text)
70
  politeness_label, politeness_score = analyze_politeness(translated_text)
71
 
72
  sentiment_influential_words = analyze_with_influence(icelandic_text, sentiment_bench)
 
74
  toxicity_influential_words = analyze_with_influence(translated_text, toxicity_bench)
75
  politeness_influential_words = analyze_with_influence(translated_text, politeness_bench)
76
 
77
+ analysis_results = f"""
78
+ Translated Text: {translated_text}\n\n
79
+ Sentiment: {sentiment_label}, Score: {round(sentiment_score, 2)}\n
80
+ Influential Words in Sentiment: {sentiment_influential_words}\n
81
+ Formality: {formality_label}, Score: {round(formality_score, 2)}\n
82
+ Influential Words in Formality: {formality_influential_words}\n
83
+ Toxicity: {toxicity_label}, Score: {round(toxicity_score, 2)}\n
84
+ Influential Words in Toxicity: {toxicity_influential_words}\n
85
+ Politeness: {politeness_label}, Score: {round(politeness_score, 2)}\n
86
+ Influential Words in Politeness: {politeness_influential_words}
87
+ """
88
+ return analysis_results.strip()
89
 
90
  demo = gr.Interface(fn=analyze_text,
91
  inputs=gr.Textbox(lines=2, placeholder="Enter Icelandic Text Here..."),