Spaces:
Sleeping
Sleeping
File size: 4,615 Bytes
a0c9ed0 1d1ff21 7b908e9 3a69eda 4a05925 3a69eda 03fa2bd 3a69eda 1d1ff21 8b75361 a0c9ed0 4e275b4 228303a 1d1ff21 228303a 1d1ff21 228303a 1d1ff21 228303a 1d1ff21 228303a a0c9ed0 1d1ff21 efd1d8f 1d1ff21 228303a 1d1ff21 fb31e25 1d1ff21 fb31e25 229c21c 228303a 1d1ff21 3a69eda 228303a 3a69eda 1d1ff21 0f7292c 3a69eda 0f7292c 3a69eda 0f7292c a0c9ed0 1d1ff21 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 |
import gradio as gr
from huggingface_hub import InferenceClient
from sentence_transformers import SentenceTransformer
import pandas as pd
from rank_bm25 import BM25Okapi
from nltk.tokenize import word_tokenize
import nltk
from openai.embeddings_utils import cosine_similarity
import spacy
from spacy.cli import download
import os
import openai
try:
nlp = spacy.load('en_core_web_sm')
except IOError:
download('en_core_web_sm')
nlp = spacy.load('en_core_web_sm')
openai.api_key = os.getenv("OPENAI_API_KEY")
nltk.download('punkt')
nltk.download('punkt_tab')
df = pd.read_pickle("hotels_data.pkl")
model = SentenceTransformer("nomic-ai/nomic-embed-text-v1.5", trust_remote_code=True)
def bm25_rank(query, df, n=15):
tokenized_corpus = [word_tokenize(doc.lower()) for doc in df['combined']]
bm25 = BM25Okapi(tokenized_corpus)
tokenized_query = word_tokenize(query.lower())
scores = bm25.get_scores(tokenized_query)
df['bm25_scores'] = scores
top_results = df.nlargest(n, 'bm25_scores')
return top_results
def search(query, df):
n = 5
query_embedding = model.encode(query)
df = bm25_rank(query, df)
df["similarity"] = df.embeddings.apply(lambda x: cosine_similarity(x, query_embedding.reshape(768, -1)))
results = df.sort_values("similarity", ascending=False).head(n)
resultlist = []
hlist = []
for r in results.index:
if results.hotel_name[r] not in hlist:
smalldf = results.loc[results.hotel_name == results.hotel_name[r]]
if smalldf.shape[1] > 3:
smalldf = smalldf[:3]
resultlist.append({
"hotel_name": results.hotel_name[r],
"image_url": smalldf.hotel_image[r],
"score": smalldf.rate[r],
"description": smalldf.hotel_description[r],
"relevant_reviews": [smalldf.review_text[s] for s in smalldf.index]
})
hlist.append(results.hotel_name[r])
return resultlist
def get_hotel_info(query):
doc = nlp(query)
if doc.ents:
city = str(doc.ents[0]).lower().capitalize()
df_filtred = df[df['locality'] == city]
if df_filtred.shape[0] == 0:
df_filtred = df
else:
city = None
df_filtred = df
results = search(query, df_filtred)
response = []
for result in results:
response.append({
'image_url': result['image_url'],
'hotel_name': result['hotel_name'],
'score': result['score'],
'description': result['description'],
'relevant_reviews': result['relevant_reviews']
})
return response
def generate_answer(query, context):
prompt = f"""
Based on the following query from a user, please generate a detailed answer based on the context
focusing on which is the top hotel based on the query. You should respond as if you are a travel agent and are conversing with the
user in a nice cordial way. Remove any special characters and (\\n), make the output clean and concise.
###########
query:
"{query}"
########
context:
"{context}"
#####
Return in Markdown format with each hotel highlighted.
"""
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
response = openai.ChatCompletion.create(
model="gpt-4o-mini",
max_tokens=1500,
n=1,
stop=None,
temperature=0.2, # Higher temperature means more creative or more hallucination
messages=messages
)
# Extract the generated response from the API response
generated_text = response.choices[0].message['content'].strip()
return generated_text
def chatbot_response(message, history):
hotel_infos = get_hotel_info(message)
if hotel_infos:
context = "\n".join([
f"Hotel Name: {info['hotel_name']}, Score: {info['score']}, Description: {info['description']}, Reviews: {info['relevant_reviews']}"
for info in hotel_infos
])
response = generate_answer(message, context)
else:
response = "No results found."
history.append((message, response))
return history, history
with gr.Blocks() as interface:
chatbot = gr.Chatbot(label="Hotel Search Chatbot")
query_input = gr.Textbox(label="Ask me about hotels!")
submit_button = gr.Button("Send")
submit_button.click(
fn=chatbot_response,
inputs=[query_input, chatbot],
outputs=[chatbot, chatbot]
)
interface.launch()
|