Spaces:

karalif
/

PipelineSpace

Runtime error

App Files Files Community

karalif commited on Mar 11, 2024

Commit

cd8fa87

verified ·

1 Parent(s): 546ed8e

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -55

app.py CHANGED Viewed

@@ -3,57 +3,32 @@ from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassifica
 import gradio as gr
 from ferret import Benchmark
-# Load models and tokenizers
 sentiment_tokenizer = AutoTokenizer.from_pretrained("Birkir/electra-base-igc-is-sentiment-analysis")
 sentiment_model = AutoModelForSequenceClassification.from_pretrained("Birkir/electra-base-igc-is-sentiment-analysis")
 formality_tokenizer = AutoTokenizer.from_pretrained("svanhvit/formality-classification-icebert")
 formality_model = AutoModelForSequenceClassification.from_pretrained("svanhvit/formality-classification-icebert")
 toxicity_tokenizer = AutoTokenizer.from_pretrained("unitary/toxic-bert")
 toxicity_model = AutoModelForSequenceClassification.from_pretrained("unitary/toxic-bert")
 politeness_tokenizer = AutoTokenizer.from_pretrained("Genius1237/xlm-roberta-large-tydip")
 politeness_model = AutoModelForSequenceClassification.from_pretrained("Genius1237/xlm-roberta-large-tydip")
-# Initialize benchmarks
-sentiment_bench = Benchmark(sentiment_model, sentiment_tokenizer)
-formality_bench = Benchmark(formality_model, formality_tokenizer)
-toxicity_bench = Benchmark(toxicity_model, toxicity_tokenizer)
 politeness_bench = Benchmark(politeness_model, politeness_tokenizer)
-# Translation pipeline
 translator = pipeline("translation", model="Helsinki-NLP/opus-mt-is-en")
-def translate_text(text):
-    translation = translator(text, max_length=512)
-    return translation[0]['translation_text']
-def analyze_sentiment(text):
-    sentiment_result = sentiment_classifier(text)
-    return sentiment_result[0]['label'].replace("LABEL_", ""), sentiment_result[0]['score']
-def analyze_formality(text):
-    formality_result = formality_classifier(text)
-    return formality_result[0]['label'], formality_result[0]['score']
-def analyze_toxicity(text):
-    toxicity_results = detoxify_pipeline(text)
-    return '1' if toxicity_results[0]['score'] >= 0.5 else '0', toxicity_results[0]['score']
-def analyze_politeness(text):
-    politeness_result = politeness_classifier(text)
-    return politeness_result[0]['label'], politeness_result[0]['score']
-def replace_encoding(tokens):
-    return [token.replace('Ġ', ' ') for token in tokens]
-def analyze_with_influence(text, bench):
-    explanations = bench.explain(text, target=0)  # Assume target=0 for binary classification; adjust if needed
-    influential_words = []
-    for explanation in explanations:
-        if explanation.explainer == 'Partition SHAP':
-            tokens = replace_encoding(explanation.tokens)
-            influential_words.extend(tokens)
-    influential_words_str = "; ".join(influential_words)
-    return influential_words_str
 def replace_encoding(tokens):
     return [token.replace('Ġ', ' ')
@@ -75,12 +50,23 @@ def replace_encoding(tokens):
                  .replace('Ãī', 'É')
                  .replace('Ãļ', 'ý')
             for token in tokens[1:-1]]
 def analyze_text(icelandic_text):
-    translated_text = translate_text(icelandic_text)
     sentiment_label, sentiment_score = analyze_sentiment(icelandic_text)
     formality_label, formality_score = analyze_formality(icelandic_text)
-    toxicity_results = analyze_toxicity(translated_text)
     politeness_label, politeness_score = analyze_politeness(translated_text)
     sentiment_influential_words = analyze_with_influence(icelandic_text, sentiment_bench)
@@ -88,18 +74,18 @@ def analyze_text(icelandic_text):
     toxicity_influential_words = analyze_with_influence(translated_text, toxicity_bench)
     politeness_influential_words = analyze_with_influence(translated_text, politeness_bench)
-    analysis_results = (
-        f"Translated Text: {translated_text}\n\n"
-        f"Sentiment: {sentiment_label}, Score: {round(sentiment_score, 2)}\n"
-        f"Influential Words in Sentiment: {sentiment_influential_words}\n"
-        f"Formality: {formality_label}, Score: {round(formality_score, 2)}\n"
-        f"Influential Words in Formality: {formality_influential_words}\n"
-        f"Toxicity: Label: {'1' if toxicity_results['score'] >= 0.5 else '0'}, Score: {round(toxicity_results['score'], 2)}\n"
-        f"Influential Words in Toxicity: {toxicity_influential_words}\n"
-        f"Politeness: {politeness_label}, Score: {round(politeness_score, 2)}\n"
-        f"Influential Words in Politeness: {politeness_influential_words}"
-    )
-    return analysis_results
 demo = gr.Interface(fn=analyze_text,
                     inputs=gr.Textbox(lines=2, placeholder="Enter Icelandic Text Here..."),

 import gradio as gr
 from ferret import Benchmark
+# Load models and tokenizers for sentiment analysis
 sentiment_tokenizer = AutoTokenizer.from_pretrained("Birkir/electra-base-igc-is-sentiment-analysis")
 sentiment_model = AutoModelForSequenceClassification.from_pretrained("Birkir/electra-base-igc-is-sentiment-analysis")
+sentiment_bench = Benchmark(sentiment_model, sentiment_tokenizer)
+# Load models and tokenizers for formality analysis
 formality_tokenizer = AutoTokenizer.from_pretrained("svanhvit/formality-classification-icebert")
 formality_model = AutoModelForSequenceClassification.from_pretrained("svanhvit/formality-classification-icebert")
+formality_bench = Benchmark(formality_model, formality_tokenizer)
+# Load models and tokenizers for toxicity analysis
 toxicity_tokenizer = AutoTokenizer.from_pretrained("unitary/toxic-bert")
 toxicity_model = AutoModelForSequenceClassification.from_pretrained("unitary/toxic-bert")
+toxicity_bench = Benchmark(toxicity_model, toxicity_tokenizer)
+# Load models and tokenizers for politeness analysis
 politeness_tokenizer = AutoTokenizer.from_pretrained("Genius1237/xlm-roberta-large-tydip")
 politeness_model = AutoModelForSequenceClassification.from_pretrained("Genius1237/xlm-roberta-large-tydip")
 politeness_bench = Benchmark(politeness_model, politeness_tokenizer)
+# Initialize pipelines for translation and classifiers
 translator = pipeline("translation", model="Helsinki-NLP/opus-mt-is-en")
+sentiment_classifier = pipeline("text-classification", model="Birkir/electra-base-igc-is-sentiment-analysis")
+formality_classifier = pipeline("text-classification", model="svanhvit/formality-classification-icebert")
+detoxify_pipeline = pipeline('text-classification', model='unitary/toxic-bert', tokenizer='bert-base-uncased', function_to_apply='sigmoid', top_k=None)
+politeness_classifier = pipeline("text-classification", model="Genius1237/xlm-roberta-large-tydip")
 def replace_encoding(tokens):
     return [token.replace('Ġ', ' ')
                  .replace('Ãī', 'É')
                  .replace('Ãļ', 'ý')
             for token in tokens[1:-1]]
+def analyze_with_influence(text, bench):
+    explanations = bench.explain(text, target=0)
+    influential_words = []
+    for explanation in explanations:
+        if explanation.explainer == 'Partition SHAP':
+            tokens = replace_encoding(explanation.tokens)
+            token_score_pairs = zip(tokens, explanation.scores)
+            influential_words.extend([(token, score) for token, score in token_score_pairs])
+    influential_words_str = "; ".join([f"{token} ({score:.2f})" for token, score in influential_words])
+    return influential_words_str
 def analyze_text(icelandic_text):
+    translated_text = translator(icelandic_text, max_length=512)[0]['translation_text']
     sentiment_label, sentiment_score = analyze_sentiment(icelandic_text)
     formality_label, formality_score = analyze_formality(icelandic_text)
+    toxicity_label, toxicity_score = analyze_toxicity(translated_text)
     politeness_label, politeness_score = analyze_politeness(translated_text)
     sentiment_influential_words = analyze_with_influence(icelandic_text, sentiment_bench)
     toxicity_influential_words = analyze_with_influence(translated_text, toxicity_bench)
     politeness_influential_words = analyze_with_influence(translated_text, politeness_bench)
+    analysis_results = f"""
+    Translated Text: {translated_text}\n\n
+    Sentiment: {sentiment_label}, Score: {round(sentiment_score, 2)}\n
+    Influential Words in Sentiment: {sentiment_influential_words}\n
+    Formality: {formality_label}, Score: {round(formality_score, 2)}\n
+    Influential Words in Formality: {formality_influential_words}\n
+    Toxicity: {toxicity_label}, Score: {round(toxicity_score, 2)}\n
+    Influential Words in Toxicity: {toxicity_influential_words}\n
+    Politeness: {politeness_label}, Score: {round(politeness_score, 2)}\n
+    Influential Words in Politeness: {politeness_influential_words}
+    """
+    return analysis_results.strip()
 demo = gr.Interface(fn=analyze_text,
                     inputs=gr.Textbox(lines=2, placeholder="Enter Icelandic Text Here..."),