In [3]:
from transformers import AutoTokenizer, AutoModelForTokenClassification
import torch

# Load the saved model and tokenizer
model_path = "./results/best_model"
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForTokenClassification.from_pretrained(model_path)

service_mapping = {
 "hotel": ["hotel", "hotels", "khách sạn", "khach san", "ks"],
 "flight": ["flight", "flights", "vé máy bay", "máy bay","may bay"],
 "car rental": ["car rental", "car rentals", "thuê xe", "xe"],
 "ticket": ["ticket", "tickets", "vé", "vé tham quan","ve", "ve tham quan"],
 "tour": ["tour", "tours", "du lịch","du lich"]
 }

# Define id2label mapping
id2label = {0: "O", 1: "B-SERVICE", 2: "I-SERVICE", 3: "B-LOCATION", 4: "I-LOCATION"}

def map_service(service):
 service = service.lower()
 for key, values in service_mapping.items():
 if any(v in service for v in values):
 return key
 return None

def predict(text):
 # Tokenize the input text
 inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
 with torch.no_grad():
 outputs = model(**inputs)
 predictions = torch.argmax(outputs.logits, dim=2)
 predicted_labels = [id2label[p.item()] for p in predictions[0]]
 word_ids = inputs.word_ids()
 aligned_labels = []
 current_word = None
 for word_id, label in zip(word_ids, predicted_labels):
 if word_id != current_word:
 aligned_labels.append(label)
 current_word = word_id
 
 # Extract entities
 entities = {"SERVICE": [], "LOCATION": []}
 current_entity = None
 current_tokens = []
 
 words = text.split()
 for word, label in zip(words, aligned_labels):
 if label.startswith("B-"):
 if current_entity:
 if current_entity == "SERVICE":
 mapped_service = map_service(" ".join(current_tokens))
 if mapped_service:
 entities[current_entity].append(mapped_service)
 else:
 entities[current_entity].append(" ".join(current_tokens))
 current_entity = label[2:]
 current_tokens = [word]
 elif label.startswith("I-") and current_entity:
 current_tokens.append(word)
 else:
 if current_entity:
 if current_entity == "SERVICE":
 mapped_service = map_service(" ".join(current_tokens))
 if mapped_service:
 entities[current_entity].append(mapped_service)
 else:
 entities[current_entity].append(" ".join(current_tokens))
 current_entity = None
 current_tokens = []
 
 if current_entity:
 if current_entity == "SERVICE":
 mapped_service = map_service(" ".join(current_tokens))
 if mapped_service:
 entities[current_entity].append(mapped_service)
 else:
 entities[current_entity].append(" ".join(current_tokens))
 
 if entities["SERVICE"]:
 entities["SERVICE"] = [entities["SERVICE"][0]]
 
 return entities

# Test function
def test_ner(text):
 print(f"Input: {text}")
 result = predict(text)
 print("Output:", result)
 return result

In [13]:
test_texts = [
 "tour du lich gia re da lat"
]

for text in test_texts:
 test_ner(text)
 print()

Input: tour du lich gia re da lat
Output: {'SERVICE': ['tour'], 'LOCATION': ['da lat']}

