karalif commited on
Commit
b284e87
verified
1 Parent(s): 96d6ff2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -63
app.py CHANGED
@@ -3,95 +3,62 @@ from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassifica
3
  import gradio as gr
4
  from ferret import Benchmark
5
 
6
- # Sentiment
7
  sentiment_tokenizer = AutoTokenizer.from_pretrained("Birkir/electra-base-igc-is-sentiment-analysis")
8
  sentiment_model = AutoModelForSequenceClassification.from_pretrained("Birkir/electra-base-igc-is-sentiment-analysis")
9
- sentiment_bench = Benchmark(sentiment_model, sentiment_tokenizer)
10
-
11
- # Formality
12
  formality_tokenizer = AutoTokenizer.from_pretrained("svanhvit/formality-classification-icebert")
13
  formality_model = AutoModelForSequenceClassification.from_pretrained("svanhvit/formality-classification-icebert")
14
- formality_bench = Benchmark(formality_model, formality_tokenizer)
15
-
16
- # Toxicity
17
  toxicity_tokenizer = AutoTokenizer.from_pretrained("unitary/toxic-bert")
18
  toxicity_model = AutoModelForSequenceClassification.from_pretrained("unitary/toxic-bert")
19
- toxicity_bench = Benchmark(toxicity_model, toxicity_tokenizer)
20
-
21
- # Politeness
22
  politeness_tokenizer = AutoTokenizer.from_pretrained("Genius1237/xlm-roberta-large-tydip")
23
  politeness_model = AutoModelForSequenceClassification.from_pretrained("Genius1237/xlm-roberta-large-tydip")
 
 
 
 
 
24
  politeness_bench = Benchmark(politeness_model, politeness_tokenizer)
25
 
26
- # Pipelines
27
  translator = pipeline("translation", model="Helsinki-NLP/opus-mt-is-en")
28
- sentiment_classifier = pipeline("text-classification", model="Birkir/electra-base-igc-is-sentiment-analysis")
29
- formality_classifier = pipeline("text-classification", model="svanhvit/formality-classification-icebert")
30
- detoxify_pipeline = pipeline('text-classification', model='unitary/toxic-bert', tokenizer='bert-base-uncased', function_to_apply='sigmoid', top_k=None)
31
- politeness_classifier = pipeline("text-classification", model="Genius1237/xlm-roberta-large-tydip")
32
 
33
  def replace_encoding(tokens):
34
  return [token.replace('臓', ' ') for token in tokens]
35
 
36
- def analyze_text_with_influence(text, bench, label_conversion):
37
- explanations = bench.explain(text, target=1)
38
  influential_words = []
39
  for explanation in explanations:
40
  if explanation.explainer == 'Partition SHAP':
41
  tokens = replace_encoding(explanation.tokens)
42
- token_score_pairs = zip(tokens, explanation.scores)
43
- influential_words.extend([(token, score) for token, score in token_score_pairs])
44
- influential_words_str = "; ".join([f"{token} ({score:.2f})" for token, score in influential_words])
45
- return label_conversion, influential_words_str
46
-
47
- def translate_text(text):
48
- translation = translator(text, max_length=512)
49
- return translation[0]['translation_text']
50
-
51
- def analyze_sentiment(text):
52
- sentiment_result = sentiment_classifier(text)
53
- return sentiment_result[0]['label'].replace("LABEL_", ""), sentiment_result[0]['score']
54
-
55
- def analyze_formality(text):
56
- formality_result = formality_classifier(text)
57
- return formality_result[0]['label'], formality_result[0]['score']
58
-
59
- def analyze_toxicity(text):
60
- toxicity_results = detoxify_pipeline(text)
61
- return toxicity_results[0]
62
-
63
- def analyze_politeness(text):
64
- politeness_result = politeness_classifier(text)
65
- return politeness_result[0]['label'], politeness_result[0]['score']
66
 
67
  def analyze_text(icelandic_text):
68
  translated_text = translate_text(icelandic_text)
69
-
70
  sentiment_label, sentiment_score = analyze_sentiment(icelandic_text)
71
- sentiment_analysis = analyze_text_with_influence(icelandic_text, sentiment_bench, sentiment_label)
72
-
73
  formality_label, formality_score = analyze_formality(icelandic_text)
74
- formality_analysis = analyze_text_with_influence(icelandic_text, formality_bench, formality_label)
75
-
76
  toxicity_results = analyze_toxicity(translated_text)
77
- toxicity_label = '1' if toxicity_results['score'] >= 0.5 else '0'
78
- toxicity_analysis = analyze_text_with_influence(translated_text, toxicity_bench, toxicity_label)
79
-
80
  politeness_label, politeness_score = analyze_politeness(translated_text)
81
- politeness_analysis = analyze_text_with_influence(translated_text, politeness_bench, politeness_label)
82
 
83
- analysis_results = f"""
84
- Translated Text: {translated_text}\n\n
85
- Sentiment: {sentiment_analysis[0]}, Score: {round(sentiment_score, 2)}\n
86
- Influential Words in Sentiment: {sentiment_analysis[1]}\n
87
- Formality: {formality_analysis[0]}, Score: {round(formality_score, 2)}\n
88
- Influential Words in Formality: {formality_analysis[1]}\n
89
- Toxicity: {toxicity_analysis[0]}, Score: {round(toxicity_results['score'], 2)}\n
90
- Influential Words in Toxicity: {toxicity_analysis[1]}\n
91
- Politeness: {politeness_analysis[0]}, Score: {round(politeness_score, 2)}\n
92
- Influential Words in Politeness: {politeness_analysis[1]}
93
- """
94
- return analysis_results.strip()
 
 
 
 
 
95
 
96
  demo = gr.Interface(fn=analyze_text,
97
  inputs=gr.Textbox(lines=2, placeholder="Enter Icelandic Text Here..."),
@@ -100,4 +67,4 @@ demo = gr.Interface(fn=analyze_text,
100
  description="This app translates Icelandic text to English and performs sentiment, formality, toxicity, and politeness analysis along with influential words analysis.")
101
 
102
  if __name__ == "__main__":
103
- demo.launch()
 
3
  import gradio as gr
4
  from ferret import Benchmark
5
 
6
+ # Load models and tokenizers
7
  sentiment_tokenizer = AutoTokenizer.from_pretrained("Birkir/electra-base-igc-is-sentiment-analysis")
8
  sentiment_model = AutoModelForSequenceClassification.from_pretrained("Birkir/electra-base-igc-is-sentiment-analysis")
 
 
 
9
  formality_tokenizer = AutoTokenizer.from_pretrained("svanhvit/formality-classification-icebert")
10
  formality_model = AutoModelForSequenceClassification.from_pretrained("svanhvit/formality-classification-icebert")
 
 
 
11
  toxicity_tokenizer = AutoTokenizer.from_pretrained("unitary/toxic-bert")
12
  toxicity_model = AutoModelForSequenceClassification.from_pretrained("unitary/toxic-bert")
 
 
 
13
  politeness_tokenizer = AutoTokenizer.from_pretrained("Genius1237/xlm-roberta-large-tydip")
14
  politeness_model = AutoModelForSequenceClassification.from_pretrained("Genius1237/xlm-roberta-large-tydip")
15
+
16
+ # Initialize benchmarks
17
+ sentiment_bench = Benchmark(sentiment_model, sentiment_tokenizer)
18
+ formality_bench = Benchmark(formality_model, formality_tokenizer)
19
+ toxicity_bench = Benchmark(toxicity_model, toxicity_tokenizer)
20
  politeness_bench = Benchmark(politeness_model, politeness_tokenizer)
21
 
22
+ # Translation pipeline
23
  translator = pipeline("translation", model="Helsinki-NLP/opus-mt-is-en")
 
 
 
 
24
 
25
  def replace_encoding(tokens):
26
  return [token.replace('臓', ' ') for token in tokens]
27
 
28
+ def analyze_with_influence(text, bench):
29
+ explanations = bench.explain(text, target=0) # Assume target=0 for binary classification; adjust if needed
30
  influential_words = []
31
  for explanation in explanations:
32
  if explanation.explainer == 'Partition SHAP':
33
  tokens = replace_encoding(explanation.tokens)
34
+ influential_words.extend(tokens)
35
+ influential_words_str = "; ".join(influential_words)
36
+ return influential_words_str
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
  def analyze_text(icelandic_text):
39
  translated_text = translate_text(icelandic_text)
 
40
  sentiment_label, sentiment_score = analyze_sentiment(icelandic_text)
 
 
41
  formality_label, formality_score = analyze_formality(icelandic_text)
 
 
42
  toxicity_results = analyze_toxicity(translated_text)
 
 
 
43
  politeness_label, politeness_score = analyze_politeness(translated_text)
 
44
 
45
+ sentiment_influential_words = analyze_with_influence(icelandic_text, sentiment_bench)
46
+ formality_influential_words = analyze_with_influence(icelandic_text, formality_bench)
47
+ toxicity_influential_words = analyze_with_influence(translated_text, toxicity_bench)
48
+ politeness_influential_words = analyze_with_influence(translated_text, politeness_bench)
49
+
50
+ analysis_results = (
51
+ f"Translated Text: {translated_text}\n\n"
52
+ f"Sentiment: {sentiment_label}, Score: {round(sentiment_score, 2)}\n"
53
+ f"Influential Words in Sentiment: {sentiment_influential_words}\n"
54
+ f"Formality: {formality_label}, Score: {round(formality_score, 2)}\n"
55
+ f"Influential Words in Formality: {formality_influential_words}\n"
56
+ f"Toxicity: Label: {'1' if toxicity_results['score'] >= 0.5 else '0'}, Score: {round(toxicity_results['score'], 2)}\n"
57
+ f"Influential Words in Toxicity: {toxicity_influential_words}\n"
58
+ f"Politeness: {politeness_label}, Score: {round(politeness_score, 2)}\n"
59
+ f"Influential Words in Politeness: {politeness_influential_words}"
60
+ )
61
+ return analysis_results
62
 
63
  demo = gr.Interface(fn=analyze_text,
64
  inputs=gr.Textbox(lines=2, placeholder="Enter Icelandic Text Here..."),
 
67
  description="This app translates Icelandic text to English and performs sentiment, formality, toxicity, and politeness analysis along with influential words analysis.")
68
 
69
  if __name__ == "__main__":
70
+ demo.launch()