Spaces:

ketanchaudhary88
/

Bert

Runtime error

App Files Files Community

ketanchaudhary88 commited on Nov 17, 2024

Commit

5bb9c5d

verified ·

1 Parent(s): ba7202c

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -12

app.py CHANGED Viewed

@@ -2,22 +2,21 @@ from sklearn.model_selection import train_test_split
 import torch
 from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
 # Load multilingual BERT tokenizer
 tokenizer = BertTokenizer.from_pretrained("bert-base-multilingual-cased")
-# Example dataset in Hindi
-dataset = [
-    {"customer_input": "मैंने गलत सामान प्राप्त किया है, क्या मुझे रिफंड मिल सकता है?",
-     "agent_response": "मुझे खेद है कि आपको परेशानी हो रही है। कृपया अपना ऑर्डर नंबर प्रदान करें ताकि मैं आपकी सहायता कर सकूं।",
-     "label": "compliant"},
-    {"customer_input": "मेरा ऑर्डर देरी से आ रहा है, मुझे क्या करना चाहिए?",
-     "agent_response": "कृपया धैर्य रखें, हम आपकी समस्या को जल्द हल करेंगे।",
-     "label": "non-compliant"},
-    # Add more examples as needed
-]
 # Split dataset into training and evaluation sets
 train_data, eval_data = train_test_split(dataset, test_size=0.2)
 # Tokenizer function that also keeps the label in the dataset
 def tokenize_function(example):
@@ -94,7 +93,7 @@ def check_compliance(customer_input, agent_response):
         return "Non-Compliant"
 # Test the model with new data
-test_customer_input = "मेरे पास अकाउंट एक्सेस नहीं हो रहा है। क्या आप मेरी मदद कर सकते हैं?"
-test_agent_response = "मुझे खेद है। कृपया अपना उपयोगकर्ता नाम साझा करें, ताकि मैं आपकी सहायता कर सकूं।"
 result = check_compliance(test_customer_input, test_agent_response)
 print(result)

 import torch
 from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
+# Load the CSV file into a pandas DataFrame
+dataset = pd.read_csv('customer_address_compliance_scenarios.csv')
 # Load multilingual BERT tokenizer
 tokenizer = BertTokenizer.from_pretrained("bert-base-multilingual-cased")
+# Preprocess the data
+# df = df.dropna()  # Optional: Drop rows with missing values
+#X = df.drop(columns=['target_column'])  # Features
+#y = df['target_column']  # Target variable
 # Split dataset into training and evaluation sets
 train_data, eval_data = train_test_split(dataset, test_size=0.2)
+#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
 # Tokenizer function that also keeps the label in the dataset
 def tokenize_function(example):
         return "Non-Compliant"
 # Test the model with new data
+test_customer_input = ""
+test_agent_response = "Is this your address ?"
 result = check_compliance(test_customer_input, test_agent_response)
 print(result)