from transformers import TextClassificationPipeline, AutoTokenizer, AutoModelForSequenceClassification from nooffense.sentence_encoder import SentenceEncoder import numpy as np import gradio as gr import os models = [ "Overfit-GM/bert-base-turkish-cased-offensive", "Overfit-GM/bert-base-turkish-uncased-offensive", "Overfit-GM/bert-base-turkish-128k-cased-offensive", "Overfit-GM/bert-base-turkish-128k-uncased-offensive", "Overfit-GM/convbert-base-turkish-mc4-cased-offensive", "Overfit-GM/convbert-base-turkish-mc4-uncased-offensive", "Overfit-GM/convbert-base-turkish-cased-offensive", "Overfit-GM/distilbert-base-turkish-cased-offensive", "Overfit-GM/electra-base-turkish-cased-discriminator-offensive", "Overfit-GM/electra-base-turkish-mc4-cased-discriminator-offensive", "Overfit-GM/electra-base-turkish-mc4-uncased-discriminator-offensive", "Overfit-GM/xlm-roberta-large-turkish-offensive", "Overfit-GM/mdeberta-v3-base-offensive" ] sentence_list = [] #global variable go brr def normalize_outputs(pred): values = np.asarray([p[1] for p in pred]) normalized = (values-min(values))/(max(values)-min(values)) new_preds = {p[0]:float(v) for p,v in zip(pred, normalized)} return new_preds def clear_sentences(): sentence_list.clear() return None def display_list(text): sentence_list.append(text) new_text = '\n'.join(sentence_list) return new_text def sentiment_analysis(text, model_choice): model = SentenceEncoder(models[model_choice]) pred = model.find_most_similar(text, sentence_list) return normalize_outputs(pred) with gr.Blocks() as embed_interface: gr.HTML("""