File size: 3,093 Bytes
879567b
 
 
e5e8c50
879567b
 
e5e8c50
 
879567b
e5e8c50
 
 
 
 
879567b
e5e8c50
 
c834b91
e5e8c50
 
c834b91
e5e8c50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
879567b
 
 
 
e5e8c50
879567b
 
 
 
 
 
 
 
 
 
 
 
 
e5e8c50
879567b
 
 
e5e8c50
879567b
 
 
 
 
 
e5e8c50
 
879567b
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import gradio as gr
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer

# Load your model
model = SentenceTransformer('sentence-transformers/distiluse-base-multilingual-cased-v2')

# Load embeddings and DataFrames (replace these with your paths)
embeddings_hotels = np.load("embeddings_hotels.npy")
embeddings_ar = np.load("embeddings_ar.npy")
df_hotels = pd.read_csv("hotels.csv")
df_ar = pd.read_csv("arabic_data.csv")

def search_in_combined(query_text, model, k=5):
    query_embedding = model.encode(query_text, convert_to_tensor=True).cpu().numpy().reshape(1, -1)

    similarities_hotels = cosine_similarity(query_embedding, embeddings_hotels).flatten()
    similarities_ar = cosine_similarity(query_embedding, embeddings_ar).flatten()

    top_indices_hotels = np.argsort(similarities_hotels)[::-1][:k]
    top_indices_ar = np.argsort(similarities_ar)[::-1][:k]

    top_hotels = df_hotels.iloc[top_indices_hotels].copy()
    top_ar = df_ar.iloc[top_indices_ar].copy()

    top_hotels['similarity'] = similarities_hotels[top_indices_hotels]
    top_ar['similarity'] = similarities_ar[top_indices_ar]

    combined_top_results = pd.concat([top_hotels, top_ar], ignore_index=True)
    combined_top_results = combined_top_results.sort_values(by='similarity', ascending=False)

    combined_top_results['google_maps_url'] = combined_top_results.apply(
        lambda row: f"https://www.google.com/maps/search/?api=1&query={row['hotel_name'].replace(' ', '+')}"
        if 'hotel_name' in row and not pd.isna(row['hotel_name']) else '',
        axis=1
    )

    return combined_top_results.head(k)

def format_results(results):
    formatted_results = []
    for _, row in results.iterrows():
        if not pd.isna(row.get('hotel_name', '')):
            google_maps_url = f"https://www.google.com/maps/search/?api=1&query={row.get('hotel_name', 'N/A').replace(' ', '+')}"
            result = (
                f"<b>Hotel Name</b>: {row.get('hotel_name', 'N/A')}<br>"
                f"<b>Description</b>: {row.get('hotel_description', 'N/A')}<br>"
                f"<b>Google Maps</b>: <a href='{google_maps_url}' target='_blank'>View on Maps</a><br>"
                f"<b>Image</b>: <img src='{row.get('hotel_image', 'N/A')}' width='200' /><br>"
            )
        else:
            result = (
                f"<b>Name</b>: {row.get('name', 'N/A')}<br>"
                f"<b>Rating</b>: {row.get('rating', 'N/A')}<br>"
            )
        formatted_results.append(result)

    return "<br><br>".join(formatted_results)

def search_interface(query_text):
    results = search_in_combined(query_text, model, k=5)
    return format_results(results)

iface = gr.Interface(
    fn=search_interface,
    inputs=gr.Textbox(label="Enter your search query"),
    outputs=gr.HTML(label="Search Results"),
    title="Hotel and Arabic Data Search",
    description="Search for hotels or Arabic data. Results will show the top matches.",
    examples=["Riyadh", "Deluxe Room"]
)

iface.launch()