Spaces:
Runtime error
Runtime error
File size: 4,573 Bytes
f657d03 71775e2 f657d03 71775e2 97a23a7 71775e2 97a23a7 71775e2 f657d03 71775e2 97a23a7 71775e2 97a23a7 71775e2 f657d03 71775e2 f657d03 71775e2 f657d03 f1a2cd3 aa955f2 b2dc915 97a23a7 3318d6e 97a23a7 f657d03 b2dc915 49d0e05 b2dc915 49d0e05 aa955f2 f657d03 71775e2 f657d03 71775e2 f657d03 71775e2 97a23a7 71775e2 f657d03 71775e2 f657d03 b2dc915 49d0e05 f657d03 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 |
import datetime
import gradio as gr
from langdetect import detect, DetectorFactory, detect_langs
from transformers import pipeline
models = {'en': 'Narsil/deberta-large-mnli-zero-cls', # English
'de': 'Sahajtomar/German_Zeroshot', # German
'es': 'Recognai/zeroshot_selectra_medium', # Spanish
'it': 'joeddav/xlm-roberta-large-xnli', # Italian
'ru': 'DeepPavlov/xlm-roberta-large-en-ru-mnli', # Russian
'no': 'NbAiLab/nb-bert-base-mnli'} # Norsk
hypothesis_templates = {'en': 'This example is {}.', # English
'de': 'Dieses beispiel ist {}.', # German
'es': 'Este ejemplo es {}.', # Spanish
'it': 'Questo esempio è {}.', # Italian
'ru': 'Этот пример {}.', # Russian
'no': 'Dette eksempelet er {}.'} # Norsk
def detect_lang(sequence, labels):
DetectorFactory.seed = 0
seq_lang = 'en'
try:
seq_lang = detect(sequence)
lbl_lang = detect(labels)
except:
print("Language detection failed!",
"Date:{}, Sequence:{}, Labels:{}".format(
str(datetime.datetime.now()),
labels))
if seq_lang != lbl_lang:
print("Different languages detected for sequence and labels!",
"Date:{}, Sequence:{}, Labels:{}, Sequence Language:{}, Label Language:{}".format(
str(datetime.datetime.now()),
sequence,
labels,
seq_lang,
lbl_lang))
if seq_lang in models:
print("Sequence Language detected.",
"Date:{}, Sequence:{}, Sequence Language:{}".format(
str(datetime.datetime.now()),
sequence,
seq_lang))
else:
print("Language not supported. Defaulting to English!",
"Date:{}, Sequence:{}, Sequence Language:{}".format(
str(datetime.datetime.now()),
sequence,
seq_lang))
seq_lang = 'en'
return seq_lang
def sequence_to_classify(sequence, labels):
label_clean = str(labels).split(",")
lang = detect_lang(sequence, labels)
classifier = pipeline("zero-shot-classification",
hypothesis_template=hypothesis_templates[lang],
model=models[lang])
response = classifier(sequence, label_clean, multi_label=True)
predicted_labels = response['labels']
predicted_scores = response['scores']
clean_output = {idx: float(predicted_scores.pop(0)) for idx in predicted_labels}
print("Date:{}, Sequence:{}, Labels: {}".format(
str(datetime.datetime.now()),
sequence,
predicted_labels))
return clean_output
example_text1 = "Coronavirus disease (COVID-19) is an infectious disease caused by the SARS-CoV-2 virus. Most \
people who fall sick with COVID-19 will experience mild to moderate symptoms and recover without special treatment. \
However, some will become seriously ill and require medical attention."
example_labels1="business,health related,politics,climate change"
example_text2="Ja, vi elsker dette landet,\
som det stiger frem,\
furet, værbitt over vannet,\
med de tusen hjem.\
Og som fedres kamp har hevet\
det av nød til seir"
example_labels2="helse,sport,religion,mat,patriotisme og nasjonalisme"
example_text3="Elephants are"
example_labels3="big,small,strong,fast,carnivorous"
example_text4="Dogs are man's best friend"
example_labels4="positive,negative,neutral"
example_text5="Amar sonar bangla ami tomay bhalobasi"
example_labels5="bhalo,kharap"
iface = gr.Interface(
title="Multilingual Multi-label Zero-shot Classification",
description="Currently supported languages are English, German, Spanish, Italian, Russian, Norsk.",
fn=sequence_to_classify,
inputs=[gr.inputs.Textbox(lines=20,
label="Please enter the text you would like to classify...",
placeholder="Text here..."),
gr.inputs.Textbox(lines=5,
label="Possible candidate labels (separated by comma)...",
placeholder="Labels here separated by comma...")],
outputs=gr.outputs.Label(num_top_classes=5),
capture_session=True,
#interpretation="default",
examples=[
[example_text1, example_labels1],
[example_text2, example_labels2],
[example_text3, example_labels3],
[example_text4, example_labels4],
[example_text5, example_labels5]])
iface.launch()
|