karalif commited on
Commit
3e1d85e
verified
1 Parent(s): 089af2e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -21
app.py CHANGED
@@ -3,29 +3,28 @@ from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassifica
3
  import gradio as gr
4
  from ferret import Benchmark
5
 
6
- # Load models and tokenizers for sentiment analysis
7
  sentiment_tokenizer = AutoTokenizer.from_pretrained("Birkir/electra-base-igc-is-sentiment-analysis")
8
  sentiment_model = AutoModelForSequenceClassification.from_pretrained("Birkir/electra-base-igc-is-sentiment-analysis")
9
- sentiment_bench = Benchmark(sentiment_model, sentiment_tokenizer)
10
-
11
- # Load models and tokenizers for formality analysis
12
  formality_tokenizer = AutoTokenizer.from_pretrained("svanhvit/formality-classification-icebert")
13
  formality_model = AutoModelForSequenceClassification.from_pretrained("svanhvit/formality-classification-icebert")
14
- formality_bench = Benchmark(formality_model, formality_tokenizer)
15
-
16
- # Load models and tokenizers for toxicity analysis
17
  toxicity_tokenizer = AutoTokenizer.from_pretrained("unitary/toxic-bert")
18
  toxicity_model = AutoModelForSequenceClassification.from_pretrained("unitary/toxic-bert")
19
- toxicity_bench = Benchmark(toxicity_model, toxicity_tokenizer)
20
-
21
- # Load models and tokenizers for politeness analysis
22
  politeness_tokenizer = AutoTokenizer.from_pretrained("Genius1237/xlm-roberta-large-tydip")
23
  politeness_model = AutoModelForSequenceClassification.from_pretrained("Genius1237/xlm-roberta-large-tydip")
 
 
 
 
 
24
  politeness_bench = Benchmark(politeness_model, politeness_tokenizer)
25
 
26
- # Initialize pipelines for translation and classifiers
27
  translator = pipeline("translation", model="Helsinki-NLP/opus-mt-is-en")
28
- detoxify_pipeline = pipeline('text-classification', model='unitary/toxic-bert', tokenizer='bert-base-uncased', function_to_apply='sigmoid', top_k=None)
 
 
 
29
 
30
  def replace_encoding(tokens):
31
  return [token.replace('臓', ' ')
@@ -60,19 +59,33 @@ def analyze_with_influence(text, bench):
60
  return influential_words_str
61
 
62
  def analyze_text(icelandic_text):
 
63
  translated_text = translator(icelandic_text, max_length=512)[0]['translation_text']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
- sentiment_analysis = analyze_with_influence(icelandic_text, sentiment_bench)
66
- formality_analysis = analyze_with_influence(icelandic_text, formality_bench)
67
- toxicity_analysis = analyze_with_influence(translated_text, toxicity_bench)
68
- politeness_analysis = analyze_with_influence(translated_text, politeness_bench)
69
 
70
  analysis_results = f"""
71
  Translated Text: {translated_text}\n\n
72
- Sentiment Analysis with Influential Words: {sentiment_analysis}\n
73
- Formality Analysis with Influential Words: {formality_analysis}\n
74
- Toxicity Analysis with Influential Words: {toxicity_analysis}\n
75
- Politeness Analysis with Influential Words: {politeness_analysis}
76
  """
77
  return analysis_results.strip()
78
 
@@ -80,7 +93,7 @@ demo = gr.Interface(fn=analyze_text,
80
  inputs=gr.Textbox(lines=2, placeholder="Enter Icelandic Text Here..."),
81
  outputs=gr.Textbox(label="Analysis Results"),
82
  title="Icelandic Text Analysis",
83
- description="This app translates Icelandic text to English and performs analysis with influential words for sentiment, formality, toxicity, and politeness.")
84
 
85
  if __name__ == "__main__":
86
  demo.launch()
 
3
  import gradio as gr
4
  from ferret import Benchmark
5
 
6
+ # Load models and tokenizers
7
  sentiment_tokenizer = AutoTokenizer.from_pretrained("Birkir/electra-base-igc-is-sentiment-analysis")
8
  sentiment_model = AutoModelForSequenceClassification.from_pretrained("Birkir/electra-base-igc-is-sentiment-analysis")
 
 
 
9
  formality_tokenizer = AutoTokenizer.from_pretrained("svanhvit/formality-classification-icebert")
10
  formality_model = AutoModelForSequenceClassification.from_pretrained("svanhvit/formality-classification-icebert")
 
 
 
11
  toxicity_tokenizer = AutoTokenizer.from_pretrained("unitary/toxic-bert")
12
  toxicity_model = AutoModelForSequenceClassification.from_pretrained("unitary/toxic-bert")
 
 
 
13
  politeness_tokenizer = AutoTokenizer.from_pretrained("Genius1237/xlm-roberta-large-tydip")
14
  politeness_model = AutoModelForSequenceClassification.from_pretrained("Genius1237/xlm-roberta-large-tydip")
15
+
16
+ # Initialize benchmarks
17
+ sentiment_bench = Benchmark(sentiment_model, sentiment_tokenizer)
18
+ formality_bench = Benchmark(formality_model, formality_tokenizer)
19
+ toxicity_bench = Benchmark(toxicity_model, toxicity_tokenizer)
20
  politeness_bench = Benchmark(politeness_model, politeness_tokenizer)
21
 
22
+ # Initialize pipelines for translation and text classification
23
  translator = pipeline("translation", model="Helsinki-NLP/opus-mt-is-en")
24
+ sentiment_classifier = pipeline("text-classification", model="Birkir/electra-base-igc-is-sentiment-analysis")
25
+ formality_classifier = pipeline("text-classification", model="svanhvit/formality-classification-icebert")
26
+ detoxify_classifier = pipeline('text-classification', model='unitary/toxic-bert', tokenizer='bert-base-uncased', function_to_apply='sigmoid', top_k=None)
27
+ politeness_classifier = pipeline("text-classification", model="Genius1237/xlm-roberta-large-tydip")
28
 
29
  def replace_encoding(tokens):
30
  return [token.replace('臓', ' ')
 
59
  return influential_words_str
60
 
61
  def analyze_text(icelandic_text):
62
+ # Perform translations
63
  translated_text = translator(icelandic_text, max_length=512)[0]['translation_text']
64
+
65
+ # Perform initial analysis to get scores
66
+ sentiment_result = sentiment_classifier(icelandic_text)[0]
67
+ formality_result = formality_classifier(icelandic_text)[0]
68
+ toxicity_result = detoxify_classifier(translated_text)[0]
69
+ politeness_result = politeness_classifier(translated_text)[0]
70
+
71
+ # Gather scores and labels
72
+ scores_labels = {
73
+ "Sentiment": (sentiment_result['score'], sentiment_bench),
74
+ "Formality": (formality_result['score'], formality_bench),
75
+ "Toxicity": (toxicity_result['score'], toxicity_bench),
76
+ "Politeness": (politeness_result['score'], politeness_bench)
77
+ }
78
+
79
+ # Identify the aspect with the lowest score
80
+ lowest_aspect = min(scores_labels, key=lambda x: scores_labels[x][0])
81
 
82
+ # Perform Ferret analysis on the aspect with the lowest score
83
+ influential_words = analyze_with_influence(icelandic_text if lowest_aspect in ["Sentiment", "Formality"] else translated_text, scores_labels[lowest_aspect][1])
 
 
84
 
85
  analysis_results = f"""
86
  Translated Text: {translated_text}\n\n
87
+ Lowest Score Aspect: {lowest_aspect}\n
88
+ Influential Words in {lowest_aspect}: {influential_words}
 
 
89
  """
90
  return analysis_results.strip()
91
 
 
93
  inputs=gr.Textbox(lines=2, placeholder="Enter Icelandic Text Here..."),
94
  outputs=gr.Textbox(label="Analysis Results"),
95
  title="Icelandic Text Analysis",
96
+ description="This app translates Icelandic text to English and performs analysis with influential words for the aspect with the lowest score.")
97
 
98
  if __name__ == "__main__":
99
  demo.launch()