karalif commited on
Commit
96d6ff2
verified
1 Parent(s): 6c4f7e8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -65
app.py CHANGED
@@ -3,64 +3,59 @@ from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassifica
3
  import gradio as gr
4
  from ferret import Benchmark
5
 
 
6
  sentiment_tokenizer = AutoTokenizer.from_pretrained("Birkir/electra-base-igc-is-sentiment-analysis")
7
  sentiment_model = AutoModelForSequenceClassification.from_pretrained("Birkir/electra-base-igc-is-sentiment-analysis")
8
-
9
  sentiment_bench = Benchmark(sentiment_model, sentiment_tokenizer)
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  translator = pipeline("translation", model="Helsinki-NLP/opus-mt-is-en")
12
  sentiment_classifier = pipeline("text-classification", model="Birkir/electra-base-igc-is-sentiment-analysis")
13
  formality_classifier = pipeline("text-classification", model="svanhvit/formality-classification-icebert")
14
  detoxify_pipeline = pipeline('text-classification', model='unitary/toxic-bert', tokenizer='bert-base-uncased', function_to_apply='sigmoid', top_k=None)
15
  politeness_classifier = pipeline("text-classification", model="Genius1237/xlm-roberta-large-tydip")
16
 
17
- def analyze_sentiment_with_influence(icelandic_text):
18
- sentiment_label, sentiment_score = analyze_sentiment(icelandic_text)
19
-
20
- sentiment_label = sentiment_label.replace("LABEL_", "")
21
-
22
- explanations_sentiment = sentiment_bench.explain(icelandic_text, target=1) # Adjust target as necessary
23
-
24
  influential_words = []
25
- for explanation in explanations_sentiment:
26
  if explanation.explainer == 'Partition SHAP':
27
  tokens = replace_encoding(explanation.tokens)
28
  token_score_pairs = zip(tokens, explanation.scores)
29
  influential_words.extend([(token, score) for token, score in token_score_pairs])
30
-
31
  influential_words_str = "; ".join([f"{token} ({score:.2f})" for token, score in influential_words])
32
-
33
- analysis_results = (
34
- f"Sentiment: Label: {sentiment_label}, Score: {round(sentiment_score, 2)}\n"
35
- f"Influential Words: {influential_words_str}"
36
- )
37
- return analysis_results
38
-
39
- def replace_encoding(tokens):
40
- return [token.replace('臓', ' ')
41
- .replace('脙掳', '冒')
42
- .replace('脙漏', '茅')
43
- .replace('脙娄', '忙')
44
- .replace('脙陆', '媒')
45
- .replace('脙隆', '谩')
46
- .replace('脙潞', '煤')
47
- .replace('脙艃', '铆')
48
- .replace('脙枚', '枚')
49
- .replace('脙戮', '镁')
50
- .replace('脙模', '脕')
51
- .replace('脙寞', '脷')
52
- .replace('脙牡', '脫')
53
- .replace('脙抹', '脝')
54
- .replace('脙牟', '脨')
55
- .replace('脙母', '脰')
56
- .replace('脙墨', '脡')
57
- .replace('脙募', '媒')
58
- for token in tokens[1:-1]]
59
 
60
  def translate_text(text):
61
  translation = translator(text, max_length=512)
62
  return translation[0]['translation_text']
63
 
 
 
 
 
 
 
 
 
64
  def analyze_toxicity(text):
65
  toxicity_results = detoxify_pipeline(text)
66
  return toxicity_results[0]
@@ -69,45 +64,40 @@ def analyze_politeness(text):
69
  politeness_result = politeness_classifier(text)
70
  return politeness_result[0]['label'], politeness_result[0]['score']
71
 
72
- def analyze_formality(text):
73
- formality_result = formality_classifier(text)
74
- formality_label = formality_result[0]['label']
75
- formality_score = formality_result[0]['score']
76
- return formality_label, formality_score
77
 
78
- def analyze_sentiment(text):
79
- sentiment_result = sentiment_classifier(text)
80
- sentiment_label = sentiment_result[0]['label']
81
- sentiment_score = sentiment_result[0]['score']
82
- return sentiment_label, sentiment_score
83
 
84
- def analyze_text(icelandic_text):
85
  formality_label, formality_score = analyze_formality(icelandic_text)
86
- translated_text = translate_text(icelandic_text)
 
87
  toxicity_results = analyze_toxicity(translated_text)
88
- if isinstance(toxicity_results, list):
89
- toxicity_results = toxicity_results[0]
90
  toxicity_label = '1' if toxicity_results['score'] >= 0.5 else '0'
91
- politeness_label, politeness_score = analyze_politeness(translated_text)
92
- politeness_label = '1' if politeness_label.lower() == 'polite' else '0'
93
-
94
- sentiment_analysis_with_influence = analyze_sentiment_with_influence(icelandic_text)
95
-
96
- analysis_results = (
97
- f"Translated Text: {translated_text}\n\n"
98
- f"{sentiment_analysis_with_influence}\n"
99
- f"Formality: Label: {formality_label}, Score: {round(formality_score, 2)}\n"
100
- f"Toxicity: Label: {toxicity_label}, Score: {round(toxicity_results['score'], 2)}\n"
101
- f"Politeness: Label: {politeness_label}, Score: {round(politeness_score, 2)}"
102
- )
103
- return analysis_results
104
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
 
106
  demo = gr.Interface(fn=analyze_text,
107
  inputs=gr.Textbox(lines=2, placeholder="Enter Icelandic Text Here..."),
108
  outputs=gr.Textbox(label="Analysis Results"),
109
  title="Icelandic Text Analysis",
110
- description="This app translates Icelandic text to English and performs sentiment, formality, toxicity, and politeness analysis.")
111
 
112
  if __name__ == "__main__":
113
  demo.launch()
 
3
  import gradio as gr
4
  from ferret import Benchmark
5
 
6
+ # Sentiment
7
  sentiment_tokenizer = AutoTokenizer.from_pretrained("Birkir/electra-base-igc-is-sentiment-analysis")
8
  sentiment_model = AutoModelForSequenceClassification.from_pretrained("Birkir/electra-base-igc-is-sentiment-analysis")
 
9
  sentiment_bench = Benchmark(sentiment_model, sentiment_tokenizer)
10
 
11
+ # Formality
12
+ formality_tokenizer = AutoTokenizer.from_pretrained("svanhvit/formality-classification-icebert")
13
+ formality_model = AutoModelForSequenceClassification.from_pretrained("svanhvit/formality-classification-icebert")
14
+ formality_bench = Benchmark(formality_model, formality_tokenizer)
15
+
16
+ # Toxicity
17
+ toxicity_tokenizer = AutoTokenizer.from_pretrained("unitary/toxic-bert")
18
+ toxicity_model = AutoModelForSequenceClassification.from_pretrained("unitary/toxic-bert")
19
+ toxicity_bench = Benchmark(toxicity_model, toxicity_tokenizer)
20
+
21
+ # Politeness
22
+ politeness_tokenizer = AutoTokenizer.from_pretrained("Genius1237/xlm-roberta-large-tydip")
23
+ politeness_model = AutoModelForSequenceClassification.from_pretrained("Genius1237/xlm-roberta-large-tydip")
24
+ politeness_bench = Benchmark(politeness_model, politeness_tokenizer)
25
+
26
+ # Pipelines
27
  translator = pipeline("translation", model="Helsinki-NLP/opus-mt-is-en")
28
  sentiment_classifier = pipeline("text-classification", model="Birkir/electra-base-igc-is-sentiment-analysis")
29
  formality_classifier = pipeline("text-classification", model="svanhvit/formality-classification-icebert")
30
  detoxify_pipeline = pipeline('text-classification', model='unitary/toxic-bert', tokenizer='bert-base-uncased', function_to_apply='sigmoid', top_k=None)
31
  politeness_classifier = pipeline("text-classification", model="Genius1237/xlm-roberta-large-tydip")
32
 
33
+ def replace_encoding(tokens):
34
+ return [token.replace('臓', ' ') for token in tokens]
35
+
36
+ def analyze_text_with_influence(text, bench, label_conversion):
37
+ explanations = bench.explain(text, target=1)
 
 
38
  influential_words = []
39
+ for explanation in explanations:
40
  if explanation.explainer == 'Partition SHAP':
41
  tokens = replace_encoding(explanation.tokens)
42
  token_score_pairs = zip(tokens, explanation.scores)
43
  influential_words.extend([(token, score) for token, score in token_score_pairs])
 
44
  influential_words_str = "; ".join([f"{token} ({score:.2f})" for token, score in influential_words])
45
+ return label_conversion, influential_words_str
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
  def translate_text(text):
48
  translation = translator(text, max_length=512)
49
  return translation[0]['translation_text']
50
 
51
+ def analyze_sentiment(text):
52
+ sentiment_result = sentiment_classifier(text)
53
+ return sentiment_result[0]['label'].replace("LABEL_", ""), sentiment_result[0]['score']
54
+
55
+ def analyze_formality(text):
56
+ formality_result = formality_classifier(text)
57
+ return formality_result[0]['label'], formality_result[0]['score']
58
+
59
  def analyze_toxicity(text):
60
  toxicity_results = detoxify_pipeline(text)
61
  return toxicity_results[0]
 
64
  politeness_result = politeness_classifier(text)
65
  return politeness_result[0]['label'], politeness_result[0]['score']
66
 
67
+ def analyze_text(icelandic_text):
68
+ translated_text = translate_text(icelandic_text)
 
 
 
69
 
70
+ sentiment_label, sentiment_score = analyze_sentiment(icelandic_text)
71
+ sentiment_analysis = analyze_text_with_influence(icelandic_text, sentiment_bench, sentiment_label)
 
 
 
72
 
 
73
  formality_label, formality_score = analyze_formality(icelandic_text)
74
+ formality_analysis = analyze_text_with_influence(icelandic_text, formality_bench, formality_label)
75
+
76
  toxicity_results = analyze_toxicity(translated_text)
 
 
77
  toxicity_label = '1' if toxicity_results['score'] >= 0.5 else '0'
78
+ toxicity_analysis = analyze_text_with_influence(translated_text, toxicity_bench, toxicity_label)
 
 
 
 
 
 
 
 
 
 
 
 
79
 
80
+ politeness_label, politeness_score = analyze_politeness(translated_text)
81
+ politeness_analysis = analyze_text_with_influence(translated_text, politeness_bench, politeness_label)
82
+
83
+ analysis_results = f"""
84
+ Translated Text: {translated_text}\n\n
85
+ Sentiment: {sentiment_analysis[0]}, Score: {round(sentiment_score, 2)}\n
86
+ Influential Words in Sentiment: {sentiment_analysis[1]}\n
87
+ Formality: {formality_analysis[0]}, Score: {round(formality_score, 2)}\n
88
+ Influential Words in Formality: {formality_analysis[1]}\n
89
+ Toxicity: {toxicity_analysis[0]}, Score: {round(toxicity_results['score'], 2)}\n
90
+ Influential Words in Toxicity: {toxicity_analysis[1]}\n
91
+ Politeness: {politeness_analysis[0]}, Score: {round(politeness_score, 2)}\n
92
+ Influential Words in Politeness: {politeness_analysis[1]}
93
+ """
94
+ return analysis_results.strip()
95
 
96
  demo = gr.Interface(fn=analyze_text,
97
  inputs=gr.Textbox(lines=2, placeholder="Enter Icelandic Text Here..."),
98
  outputs=gr.Textbox(label="Analysis Results"),
99
  title="Icelandic Text Analysis",
100
+ description="This app translates Icelandic text to English and performs sentiment, formality, toxicity, and politeness analysis along with influential words analysis.")
101
 
102
  if __name__ == "__main__":
103
  demo.launch()