Spaces:
Sleeping
Sleeping
import os | |
from groq import Groq | |
import gradio as gr | |
import numpy as np | |
from sentence_transformers import SentenceTransformer | |
from sklearn.metrics.pairwise import cosine_similarity | |
import sqlite3 | |
import pandas as pd | |
from tqdm import tqdm | |
# Get the Groq API key from environment variables (in Hugging Face, this is stored as a secret) | |
client = Groq( | |
# This is the default and can be omitted | |
api_key=os.environ.get("GROQ_API_KEY"), | |
) | |
con = sqlite3.connect("file::memory:?cache=shared", check_same_thread=False) | |
con.row_factory = sqlite3.Row | |
cur = con.cursor() | |
# create table if not exists | |
cur.execute(""" | |
CREATE TABLE IF NOT EXISTS places ( | |
Place_Id INTEGER PRIMARY KEY, -- SQLite auto-increments INTEGER PRIMARY KEY automatically | |
Place_Name TEXT NOT NULL, -- SQLite uses TEXT instead of VARCHAR | |
Description TEXT, | |
Category TEXT, | |
City TEXT, | |
Price REAL, -- SQLite uses REAL instead of DECIMAL or FLOAT | |
Rating REAL, | |
Embedding TEXT | |
); | |
""") | |
data = pd.read_csv('tourism_place.csv') | |
# check if the table is empty | |
cur.execute("SELECT * FROM places") | |
if cur.fetchone() is None: | |
# Store the places in the database | |
for i in tqdm(range(len(data))): | |
cur.execute(""" | |
INSERT INTO places (Place_Name, Description, Category, City, Price, Rating) | |
VALUES (?, ?, ?, ?, ?, ?) | |
""", (data['Place_Name'][i], data['Description'][i], data['Category'][i], data['City'][i], float(data['Price'][i]), float(data['Rating'][i])) | |
) | |
# Commit the changes to the database | |
con.commit() | |
# Compute and store embeddings | |
def compute_and_store_embeddings(): | |
model = SentenceTransformer('paraphrase-MiniLM-L6-v2') | |
# Select all places from the database | |
cur.execute("SELECT Place_Id, Place_Name, Category, Description, City FROM places") | |
places = cur.fetchall() | |
for place in places: | |
# Combine PlaceName, Category, Description, and City into one string | |
text = f"{place[1]} {place[2]} {place[3]} {place[4]}" | |
# Generate embedding for the combined text | |
embedding = model.encode(text) | |
# Convert embedding to a string format to store in the database | |
embedding_str = ','.join([str(x) for x in embedding]) | |
# Update the place in the database with the embedding | |
cur.execute( | |
"UPDATE places SET Embedding = ? WHERE Place_Id = ?", | |
(embedding_str, place[0]) | |
) | |
# Commit the changes to the database | |
con.commit() | |
# Run the function to compute and store embeddings | |
compute_and_store_embeddings() | |
# Load Hugging Face model for generating embeddings | |
model = SentenceTransformer('paraphrase-MiniLM-L6-v2') | |
# Normalize user query using Groq VM | |
def normalize_query(user_query): | |
try: | |
response = client.chat.completions.create( | |
model="llama-3.1-70b-versatile", | |
temperature=0.5, | |
messages=[{ | |
"role": "user", | |
"content": f""" | |
Please analyze the query: \"{user_query}\", extract Place name, Category, Description, and City. | |
Return the response as: "Place name, Category, Description, City". | |
""" | |
}] | |
) | |
normalized_user_query = response.choices[0].message.content.split('\n')[-1].strip() | |
print(f"Normalized Query: {normalized_user_query}") | |
return normalized_user_query | |
except Exception as e: | |
print(f"Error normalizing query: {e}") | |
return "" | |
# Generate user embedding using Hugging Face model | |
def get_user_embedding(query): | |
try: | |
return model.encode(query) | |
except Exception as e: | |
print(f"Error generating embedding: {e}") | |
return np.zeros() | |
# Find similar places based on cosine similarity | |
def get_similar_places(user_embedding): | |
similarities = [] | |
# Select all places from the database | |
res = cur.execute("SELECT * FROM places").fetchall() | |
for place in res: | |
embedding_str = place['Embedding'] # Assuming embeddings are stored as comma-separated strings in the database | |
embedding = np.array([float(x) for x in embedding_str.split(',')]) # Convert the string back to a numpy array | |
# Compute cosine similarity | |
similarity = cosine_similarity([user_embedding], [embedding])[0][0] | |
similarities.append((place, similarity)) | |
# Sort results based on similarity and then by rating | |
ranked_results = sorted(similarities, key=lambda x: (x[1], x[0]['Rating']), reverse=True) | |
# Return top places | |
return ranked_results | |
# Main function to get top 5 destinations | |
def get_top_5_destinations(user_query): | |
normalized_query = normalize_query(user_query) | |
user_embedding = get_user_embedding(normalized_query) | |
similar_places = get_similar_places(user_embedding) | |
if not similar_places: | |
return "Tidak ada tempat yang ditemukan." | |
top_places = [] | |
for i, (place, similarity) in enumerate(similar_places[:10]): | |
top_places.append({ | |
'name': place['Place_Name'], | |
'city': place['City'], | |
'category': place['Category'], | |
'rating': place['Rating'], | |
'description': place['Description'], | |
'similarity': similarity | |
}) | |
print(top_places) | |
return top_places | |
# Generate response to user using Groq VM | |
def generate_response(user_query, top_places): | |
try: | |
# Prepare the destinations data in JSON format for the model to use directly | |
destinations_data = ", ".join([ | |
f'{{"name": "{place["name"]}", "city": "{place["city"]}", "category": "{place["category"]}", "rating": {place["rating"]}, "description": "{place["description"]}"}}' | |
for place in top_places | |
]) | |
# System prompt: Simplified and focused on returning only the recommendations | |
system_prompt = """ | |
You are a tour guide assistant. Your task is to present the following tourism recommendations based on what user want and needs to the user in Bahasa Indonesia. | |
- For each destination, include the name, city, category, rating, and a short description. | |
- Do not provide any additional commentary. | |
- Only and must only return 5 places that suitable what user wants and provided the data in a clear and concise format. | |
""" | |
# Generate the response using the model | |
response = client.chat.completions.create( | |
model="llama-3.1-70b-versatile", | |
temperature=0.2, | |
messages=[ | |
{"role": "system", "content": system_prompt}, # System prompt defines behavior | |
{"role": "user", "content": f"Berikut adalah rekomendasi berdasarkan data: {destinations_data}"} | |
] | |
) | |
# Return the response content generated by the model | |
return response.choices[0].message.content | |
except Exception as e: | |
print(f"Error generating response: {e}") | |
return "Maaf, terjadi kesalahan dalam menghasilkan rekomendasi." | |
# Gradio Interface - User Input and Output | |
def chatbot(user_query): | |
# Step 1: Get the top 5 destinations | |
top_places = get_top_5_destinations(user_query) | |
if isinstance(top_places, str): # Error case, e.g. "No places found" | |
return top_places | |
# only the first 5 element of top_places | |
response = generate_response(user_query, top_places) | |
return response | |
# Define Gradio Interface | |
iface = gr.Interface( | |
fn=chatbot, | |
inputs="text", | |
outputs="text", | |
title="Tourism Recommendation Chatbot", | |
description="Masukkan pertanyaan wisata Anda dan dapatkan rekomendasi tempat terbaik!" | |
) | |
# Launch the Gradio App | |
if __name__ == "__main__": | |
iface.launch(share=True) |