Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -2,22 +2,21 @@ from sklearn.model_selection import train_test_split
|
|
2 |
import torch
|
3 |
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
|
4 |
|
|
|
|
|
|
|
|
|
5 |
# Load multilingual BERT tokenizer
|
6 |
tokenizer = BertTokenizer.from_pretrained("bert-base-multilingual-cased")
|
7 |
|
8 |
-
#
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
"label": "compliant"},
|
13 |
-
{"customer_input": "मेरा ऑर्डर देरी से आ रहा है, मुझे क्या करना चाहिए?",
|
14 |
-
"agent_response": "कृपया धैर्य रखें, हम आपकी समस्या को जल्द हल करेंगे।",
|
15 |
-
"label": "non-compliant"},
|
16 |
-
# Add more examples as needed
|
17 |
-
]
|
18 |
|
19 |
# Split dataset into training and evaluation sets
|
20 |
train_data, eval_data = train_test_split(dataset, test_size=0.2)
|
|
|
21 |
|
22 |
# Tokenizer function that also keeps the label in the dataset
|
23 |
def tokenize_function(example):
|
@@ -94,7 +93,7 @@ def check_compliance(customer_input, agent_response):
|
|
94 |
return "Non-Compliant"
|
95 |
|
96 |
# Test the model with new data
|
97 |
-
test_customer_input = "
|
98 |
-
test_agent_response = "
|
99 |
result = check_compliance(test_customer_input, test_agent_response)
|
100 |
print(result)
|
|
|
2 |
import torch
|
3 |
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
|
4 |
|
5 |
+
|
6 |
+
# Load the CSV file into a pandas DataFrame
|
7 |
+
dataset = pd.read_csv('customer_address_compliance_scenarios.csv')
|
8 |
+
|
9 |
# Load multilingual BERT tokenizer
|
10 |
tokenizer = BertTokenizer.from_pretrained("bert-base-multilingual-cased")
|
11 |
|
12 |
+
# Preprocess the data
|
13 |
+
# df = df.dropna() # Optional: Drop rows with missing values
|
14 |
+
#X = df.drop(columns=['target_column']) # Features
|
15 |
+
#y = df['target_column'] # Target variable
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
# Split dataset into training and evaluation sets
|
18 |
train_data, eval_data = train_test_split(dataset, test_size=0.2)
|
19 |
+
#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
20 |
|
21 |
# Tokenizer function that also keeps the label in the dataset
|
22 |
def tokenize_function(example):
|
|
|
93 |
return "Non-Compliant"
|
94 |
|
95 |
# Test the model with new data
|
96 |
+
test_customer_input = ""
|
97 |
+
test_agent_response = "Is this your address ?"
|
98 |
result = check_compliance(test_customer_input, test_agent_response)
|
99 |
print(result)
|