Job_sentiment_classifier / sentimentorr.py
sepehr's picture
Create sentimentorr.py
e335ec1
raw
history blame
1.83 kB
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from torch.nn.functional import softmax as softmax
import numpy as np
import torch
tokenizer = AutoTokenizer.from_pretrained("joeddav/distilbert-base-uncased-go-emotions-student")
model = AutoModelForSequenceClassification.from_pretrained("joeddav/distilbert-base-uncased-go-emotions-student")
labels = ["admiration","amusement","anger","annoyance","approval","caring","confusion","curiosity","desire","disappointment", "disapproval", "disgust", "embarrassment", "excitement", "fear", "gratitude", "grief", "joy", "love", "nervousness", "optimism", "pride", "realization", "relief", "remorse", "sadness", "surprise", "neutral"
]
labels7larg=['admiration',
'amusement',
'anger',
'annoyance',
'approval',
'caring',
'confusion',
'curiosity',
'desire',
'disappointment',
'disapproval',
'disgust',
'embarrassment',
'excitement',
'fear',
'gratitude',
'grief',
'joy',
'love',
'nervousness',
'optimism',
'pride',
'realization',
'relief',
'remorse',
'sadness',
'surprise',
'neutral',
'larg']
def sentimentor(mmm):
dfda = pd.Series(mmm)
vecs = np.array([model(**tokenizer(txt, return_tensors="pt", padding=True))[0][0].detach().numpy() for txt in dfda])
ds=pd.DataFrame(columns=labels7larg)
for iii in range(len(vecs)):
softt=softmax(torch.from_numpy(vecs[iii]), dim=0)
kki=pd.DataFrame(softt, index=labels)
ji=kki.nlargest(1, 0)
dv=pd.DataFrame(columns=labels)
dv.loc[iii,:]=softt
dv.loc[iii,'larg']=ji.index[0]
ds=pd.concat([ds, dv])
if len(ds)==len(dfda):
concatt = pd.concat([dfda,ds], axis=1)
xsd=concatt['larg']
return xsd[0]
else:
print('eshteba kardi')