import gradio as gr import torch import pickle import pandas as pd from sentence_transformers import SentenceTransformer from sklearn.preprocessing import normalize from sklearn.metrics.pairwise import cosine_similarity # Load the pre-trained embedding model (SentenceTransformer) model = SentenceTransformer('nomic-ai/nomic-embed-text-v1') # Load BART summarization model and tokenizer model_bart = torch.hub.load('pytorch/fairseq', 'bart.large.cnn') tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'tokenizer', 'facebook/bart-large-cnn') # Load normalized embeddings from the pkl file with open('normalized_embeddings.pkl', 'rb') as f: normalized_embeddings = pickle.load(f) # Load the hotel dataset (processed) df_copy_first_1000 = pd.read_csv('hotel_dataset_processed.csv') # Function to summarize text using BART def summarize_text(text): inputs = tokenizer.encode("" + text, return_tensors="pt", max_length=1024, truncation=True) summary_ids = model_bart.generate(inputs, max_length=150, min_length=30, length_penalty=2.0, num_beams=4, early_stopping=True) summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True) return summary # Function to search hotels def search_hotels(query_text, k=1): try: # Encode the query text using SentenceTransformer query_embedding = model.encode(query_text, convert_to_tensor=True) query_embedding = query_embedding.cpu().numpy().reshape(1, -1) query_embedding = normalize(query_embedding, norm='l2') # Compute cosine similarity between query embedding and stored embeddings similarities = cosine_similarity(query_embedding, normalized_embeddings) # Get indices of the top k similar hotels top_indices = similarities[0].argsort()[-k:][::-1] # Retrieve the top k similar hotels top_hotels = df_copy_first_1000.iloc[top_indices] # Prepare results results = [] for _, row in top_hotels.iterrows(): # Create a summary for the hotel details summary_text = ( f"Description: {row['hotel_description']}\n" f"Review Title: {row['review_title']}\n" f"Review Text: {row['review_text']}\n" f"Review Count: {row['review_count']}" ) summary = summarize_text(summary_text) result = ( f"Hotel Name: {row['hotel_name']}\n" f"Locality: {row['locality']}\n" f"Price Range: {row['price_range']}\n" f"Rate: {row['rate']}\n" f"\nSummary:\n{summary}\n" ) results.append(result) return "\n\n".join(results) except Exception as e: return f"An error occurred during the search: {e}" # Gradio Interface iface = gr.Interface( fn=search_hotels, inputs=gr.Textbox(label="Enter your search query"), outputs="text", title="Hotel Search Engine", description="Enter a query to search for hotels and get details about the top results." ) # Launch Gradio Interface iface.launch()