Priyanka-Kumavat
commited on
Commit
•
f35e382
1
Parent(s):
7f110ef
Upload 2 files
Browse files- app.py +97 -0
- clean_data.py +86 -0
app.py
ADDED
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import AutoModelForSequenceClassification
|
2 |
+
from transformers import AutoTokenizer, AutoConfig
|
3 |
+
from clean_data import cleaned_complaints
|
4 |
+
import numpy as np
|
5 |
+
from scipy.special import softmax
|
6 |
+
import gradio as gr
|
7 |
+
from transformers import PegasusForConditionalGeneration
|
8 |
+
|
9 |
+
# Preprocess text (username and link placeholders)
|
10 |
+
def preprocess(text):
|
11 |
+
new_text = []
|
12 |
+
for t in text.split(" "):
|
13 |
+
t = '@user' if t.startswith('@') and len(t) > 1 else t
|
14 |
+
t = 'http' if t.startswith('http') else t
|
15 |
+
new_text.append(t)
|
16 |
+
return " ".join(new_text)
|
17 |
+
|
18 |
+
# load model
|
19 |
+
MODEL = f"ThirdEyeData/Complaints_Roberta"
|
20 |
+
#model = PegasusForConditionalGeneration.from_pretrained(MODEL)
|
21 |
+
model = AutoModelForSequenceClassification.from_pretrained(MODEL)
|
22 |
+
#model.save_pretrained(MODEL)
|
23 |
+
|
24 |
+
|
25 |
+
tokenizer = AutoTokenizer.from_pretrained(MODEL)
|
26 |
+
config = AutoConfig.from_pretrained(MODEL)
|
27 |
+
|
28 |
+
# create classifier function
|
29 |
+
def classify_compliant(text):
|
30 |
+
text = cleaned_complaints(text)
|
31 |
+
if len(text)<3:
|
32 |
+
return "Cannot Categorize the Complaint"
|
33 |
+
else:
|
34 |
+
text = preprocess(text)
|
35 |
+
encoded_input = tokenizer(text, return_tensors='pt')
|
36 |
+
output = model(**encoded_input)
|
37 |
+
scores = output[0][0].detach().numpy()
|
38 |
+
scores = softmax(scores)
|
39 |
+
|
40 |
+
# Print labels and scores
|
41 |
+
probs = {}
|
42 |
+
ranking = np.argsort(scores)
|
43 |
+
ranking = ranking[::-1]
|
44 |
+
|
45 |
+
|
46 |
+
l = config.id2label[ranking[0]]
|
47 |
+
#s = scores[ranking[i]]
|
48 |
+
#probs[l] = np.round(float(s), 4)
|
49 |
+
return l
|
50 |
+
|
51 |
+
|
52 |
+
#build the Gradio app
|
53 |
+
#Instructuction = "Write an imaginary review about a product or service you might be interested in."
|
54 |
+
title="Customer Complaints Categorization"
|
55 |
+
description = """
|
56 |
+
This application uses fine-tuned Roberta to perform Customer Complaints Categorization. Roberta is a popular pre-trained language model that can be used for a variety of natural language processing tasks. This text classification model helps the company to categorize incoming support requests submitted by users and determine the appropriate course of action. The application can provide an opportunity for the service provider to resolve the customer’s problems on time and therefore, reduce dissatisfaction levels.
|
57 |
+
|
58 |
+
|
59 |
+
Write a complaint on an insurance product or service and see how the machine learning model is able to Categorization your Complaint. Below is the type in which the complaints are segmented:
|
60 |
+
1. Debt Collection
|
61 |
+
2. False Claim or Statement
|
62 |
+
3. Legal Issue
|
63 |
+
4. Improper contact or sharing of info
|
64 |
+
5. Follow Up Issue
|
65 |
+
"""
|
66 |
+
article = """
|
67 |
+
- Click submit button to test Consumer Complaint Segmentation
|
68 |
+
- Click the clear button to refresh the text
|
69 |
+
- This application has a linked model https://huggingface.co/ThirdEyeData/Complaints_Roberta
|
70 |
+
"""
|
71 |
+
|
72 |
+
demo = gr.Interface(classify_compliant,
|
73 |
+
inputs=gr.Textbox(lines =10,label = "Type your Complaint of our Product here or for a quick demo click on the examples provided below and output will automatically be populated in the output box ", max_lines = 20),
|
74 |
+
outputs = gr.Textbox(lines =5,label = "Complaint Category"),
|
75 |
+
title = title,
|
76 |
+
description = description,
|
77 |
+
#Instruction = Instructuction,
|
78 |
+
article = article,
|
79 |
+
#allow_flagging = "never",
|
80 |
+
live = False,
|
81 |
+
cache_example = False,
|
82 |
+
examples=[["""The day before my Salliemae student loan payment was due I contacted a rep to discuss the impact on my account of making my payment at the end of the month rather than the middle for just that one month.
|
83 |
+
The rep indicated it would be no problem, but that I still may get a call each day from Salliemae until I made my payment. I understood, requested my account be notated accordingly, and hung up. For two weeks I endured numerous calls per day ;
|
84 |
+
I lost count at six calls one day, which was the norm for the number of calls Salliemae made in an effort to collect a debt that had a due date that had been arranged and had not come up yet. """],
|
85 |
+
["""The representative told me the total amount due was {$2100.00} and that I can settle for half of that amount. Unfortunately, I was unable to accept the settlement but began to question the amount because my last statement was {$1800.00} and
|
86 |
+
there was nothing written in the contract for additional interest charges should my account go into collection.
|
87 |
+
I told the representative that I will pay the amount actually owed and I want to make a payment arrangement. She told me I can't just do what I want,
|
88 |
+
If I want to pay the original amount due, it has to be paid in full. I told her that that is not fair debt collection practice and that I am only contractually obligated to the {$1800.00} and we can set up an arrangement from that. """] ,
|
89 |
+
["""This debt is beyond the Maryland Statute of Limitations. It is illegal for a debt collector to collect on an expired debt. They have taken illegal action by seizing my Maryland State Refund when the debt had already expired and beyond the Statute of Limitation which is 3 years in the state of Maryland"""],
|
90 |
+
["""The company has been calling my employer in an attempt to collect a debt. When I spoke with them and informed them that this was not an appropriate number to call. I asked what company they were calling from and a phone number so he told me the company name, but the man on the phone would not give me his name or a phone number.
|
91 |
+
I had mailed a letter requesting verification a few weeks ago and hadn't received anything back. In the letter I specifically requested that all communication be done through mail."""],
|
92 |
+
[""" I do n't think I chose the correct issue above, however I think it is closest to my issue. I have a record on my credit report that I have disputed through both the company and the credit bureaus. The dispute is marked as being disputed by me on my report, but it was not removed despite the creditor not sending me verification of this debt.
|
93 |
+
I do not even know what this debt is for.I have tried contacting the collection agency by mail to obtain verification with no response and they will not remove the item from my report."""]]
|
94 |
+
|
95 |
+
)
|
96 |
+
if __name__ == "__main__":
|
97 |
+
demo.launch()
|
clean_data.py
ADDED
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import nltk
|
2 |
+
from nltk.corpus import stopwords
|
3 |
+
from nltk.stem import WordNetLemmatizer
|
4 |
+
import warnings
|
5 |
+
import re
|
6 |
+
nltk.download("stopwords")
|
7 |
+
nltk.download("wordnet")
|
8 |
+
nltk.download("words")
|
9 |
+
lemmatizer = WordNetLemmatizer()
|
10 |
+
|
11 |
+
stop_words = set(stopwords.words('english'))
|
12 |
+
|
13 |
+
contraction_mapping = {"ain't": "is not", "aren't": "are not","can't": "cannot", "'cause": "because", "could've": "could have", "couldn't": "could not",
|
14 |
+
"didn't": "did not", "doesn't": "does not", "don't": "do not", "hadn't": "had not", "hasn't": "has not", "haven't": "have not",
|
15 |
+
"he'd": "he would","he'll": "he will", "he's": "he is", "how'd": "how did", "how'd'y": "how do you", "how'll": "how will", "how's": "how is",
|
16 |
+
"I'd": "I would", "I'd've": "I would have", "I'll": "I will", "I'll've": "I will have","I'm": "I am", "I've": "I have", "i'd": "i would",
|
17 |
+
"i'd've": "i would have", "i'll": "i will", "i'll've": "i will have","i'm": "i am", "i've": "i have", "isn't": "is not", "it'd": "it would",
|
18 |
+
"it'd've": "it would have", "it'll": "it will", "it'll've": "it will have","it's": "it is", "let's": "let us", "ma'am": "madam",
|
19 |
+
"mayn't": "may not", "might've": "might have","mightn't": "might not","mightn't've": "might not have", "must've": "must have",
|
20 |
+
"mustn't": "must not", "mustn't've": "must not have", "needn't": "need not", "needn't've": "need not have","o'clock": "of the clock",
|
21 |
+
"oughtn't": "ought not", "oughtn't've": "ought not have", "shan't": "shall not", "sha'n't": "shall not", "shan't've": "shall not have",
|
22 |
+
"she'd": "she would", "she'd've": "she would have", "she'll": "she will", "she'll've": "she will have", "she's": "she is",
|
23 |
+
"should've": "should have", "shouldn't": "should not", "shouldn't've": "should not have", "so've": "so have","so's": "so as",
|
24 |
+
"this's": "this is","that'd": "that would", "that'd've": "that would have", "that's": "that is", "there'd": "there would",
|
25 |
+
"there'd've": "there would have", "there's": "there is", "here's": "here is","they'd": "they would", "they'd've": "they would have",
|
26 |
+
"they'll": "they will", "they'll've": "they will have", "they're": "they are", "they've": "they have", "to've": "to have",
|
27 |
+
"wasn't": "was not", "we'd": "we would", "we'd've": "we would have", "we'll": "we will", "we'll've": "we will have", "we're": "we are",
|
28 |
+
"we've": "we have", "weren't": "were not", "what'll": "what will", "what'll've": "what will have", "what're": "what are",
|
29 |
+
"what's": "what is", "what've": "what have", "when's": "when is", "when've": "when have", "where'd": "where did", "where's": "where is",
|
30 |
+
"where've": "where have", "who'll": "who will", "who'll've": "who will have", "who's": "who is", "who've": "who have",
|
31 |
+
"why's": "why is", "why've": "why have", "will've": "will have", "won't": "will not", "won't've": "will not have",
|
32 |
+
"would've": "would have", "wouldn't": "would not", "wouldn't've": "would not have", "y'all": "you all",
|
33 |
+
"y'all'd": "you all would","y'all'd've": "you all would have","y'all're": "you all are","y'all've": "you all have",
|
34 |
+
"you'd": "you would", "you'd've": "you would have", "you'll": "you will", "you'll've": "you will have",
|
35 |
+
"you're": "you are", "you've": "you have"}
|
36 |
+
|
37 |
+
def cleaned_complaints(text):
|
38 |
+
import nltk
|
39 |
+
from nltk.corpus import stopwords
|
40 |
+
from nltk.stem import WordNetLemmatizer
|
41 |
+
from nltk.corpus import words
|
42 |
+
import warnings
|
43 |
+
import re
|
44 |
+
|
45 |
+
lemmatizer = WordNetLemmatizer()
|
46 |
+
|
47 |
+
stop_words = set(stopwords.words('english'))
|
48 |
+
|
49 |
+
contraction_mapping = {"ain't": "is not", "aren't": "are not","can't": "cannot", "'cause": "because", "could've": "could have", "couldn't": "could not",
|
50 |
+
"didn't": "did not", "doesn't": "does not", "don't": "do not", "hadn't": "had not", "hasn't": "has not", "haven't": "have not",
|
51 |
+
"he'd": "he would","he'll": "he will", "he's": "he is", "how'd": "how did", "how'd'y": "how do you", "how'll": "how will", "how's": "how is",
|
52 |
+
"I'd": "I would", "I'd've": "I would have", "I'll": "I will", "I'll've": "I will have","I'm": "I am", "I've": "I have", "i'd": "i would",
|
53 |
+
"i'd've": "i would have", "i'll": "i will", "i'll've": "i will have","i'm": "i am", "i've": "i have", "isn't": "is not", "it'd": "it would",
|
54 |
+
"it'd've": "it would have", "it'll": "it will", "it'll've": "it will have","it's": "it is", "let's": "let us", "ma'am": "madam",
|
55 |
+
"mayn't": "may not", "might've": "might have","mightn't": "might not","mightn't've": "might not have", "must've": "must have",
|
56 |
+
"mustn't": "must not", "mustn't've": "must not have", "needn't": "need not", "needn't've": "need not have","o'clock": "of the clock",
|
57 |
+
"oughtn't": "ought not", "oughtn't've": "ought not have", "shan't": "shall not", "sha'n't": "shall not", "shan't've": "shall not have",
|
58 |
+
"she'd": "she would", "she'd've": "she would have", "she'll": "she will", "she'll've": "she will have", "she's": "she is",
|
59 |
+
"should've": "should have", "shouldn't": "should not", "shouldn't've": "should not have", "so've": "so have","so's": "so as",
|
60 |
+
"this's": "this is","that'd": "that would", "that'd've": "that would have", "that's": "that is", "there'd": "there would",
|
61 |
+
"there'd've": "there would have", "there's": "there is", "here's": "here is","they'd": "they would", "they'd've": "they would have",
|
62 |
+
"they'll": "they will", "they'll've": "they will have", "they're": "they are", "they've": "they have", "to've": "to have",
|
63 |
+
"wasn't": "was not", "we'd": "we would", "we'd've": "we would have", "we'll": "we will", "we'll've": "we will have", "we're": "we are",
|
64 |
+
"we've": "we have", "weren't": "were not", "what'll": "what will", "what'll've": "what will have", "what're": "what are",
|
65 |
+
"what's": "what is", "what've": "what have", "when's": "when is", "when've": "when have", "where'd": "where did", "where's": "where is",
|
66 |
+
"where've": "where have", "who'll": "who will", "who'll've": "who will have", "who's": "who is", "who've": "who have",
|
67 |
+
"why's": "why is", "why've": "why have", "will've": "will have", "won't": "will not", "won't've": "will not have",
|
68 |
+
"would've": "would have", "wouldn't": "would not", "wouldn't've": "would not have", "y'all": "you all",
|
69 |
+
"y'all'd": "you all would","y'all'd've": "you all would have","y'all're": "you all are","y'all've": "you all have",
|
70 |
+
"you'd": "you would", "you'd've": "you would have", "you'll": "you will", "you'll've": "you will have",
|
71 |
+
"you're": "you are", "you've": "you have"}
|
72 |
+
|
73 |
+
newString=re.sub(r'@[A-Za-z0-9]+','',text) #removing user mentions
|
74 |
+
newString=re.sub("#","",newString) #removing hashtag symbol
|
75 |
+
newString= ' '.join([contraction_mapping[t] if t in contraction_mapping else t for t in newString.split(" ")]) #contraction mapping
|
76 |
+
newString= re.sub(r'http\S+', '', newString) #removing links
|
77 |
+
newString= re.sub(r"'s\b","",newString) #removing 's
|
78 |
+
letters_only = re.sub("[^a-zA-Z]", " ", newString) #Fetching out only letters
|
79 |
+
lower_case = letters_only.lower() #converting all words to lowercase
|
80 |
+
tokens = [w for w in lower_case.split() if not w in stop_words]#stopwords removal
|
81 |
+
tokens = [x for x in tokens if x in words.words()]
|
82 |
+
# tokens= lower_case.split()
|
83 |
+
newString=''
|
84 |
+
for i in tokens:
|
85 |
+
newString=newString+lemmatizer.lemmatize(i)+' ' #converting words using lemmatisation
|
86 |
+
return newString.strip()
|