Spaces:
Sleeping
Sleeping
File size: 7,987 Bytes
40fd038 34d69b0 65d40e5 40fd038 27e23d9 40fd038 b8baef5 40fd038 18869c5 40fd038 1271c0b 40fd038 ca8e725 40fd038 e64d5d0 40fd038 e64d5d0 40fd038 65d40e5 40fd038 ca8e725 40fd038 8a369cb 40fd038 069c659 40fd038 e64d5d0 40fd038 9633001 40fd038 069c659 40fd038 069c659 40fd038 1271c0b 40fd038 ca8e725 40fd038 a0d716d 069c659 a0d716d 40fd038 65d40e5 40fd038 65d40e5 8a369cb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 |
import os
from groq import Groq
import gradio as gr
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import sqlite3
import pandas as pd
from tqdm import tqdm
# Get the Groq API key from environment variables (in Hugging Face, this is stored as a secret)
client = Groq(
# This is the default and can be omitted
api_key=os.environ.get("GROQ_API_KEY"),
)
con = sqlite3.connect("file::memory:?cache=shared", check_same_thread=False)
con.row_factory = sqlite3.Row
cur = con.cursor()
# create table if not exists
cur.execute("""
CREATE TABLE IF NOT EXISTS places (
Place_Id INTEGER PRIMARY KEY, -- SQLite auto-increments INTEGER PRIMARY KEY automatically
Place_Name TEXT NOT NULL, -- SQLite uses TEXT instead of VARCHAR
Description TEXT,
Category TEXT,
City TEXT,
Price REAL, -- SQLite uses REAL instead of DECIMAL or FLOAT
Rating REAL,
Embedding TEXT
);
""")
data = pd.read_csv('tourism_place.csv')
# check if the table is empty
cur.execute("SELECT * FROM places")
if cur.fetchone() is None:
# Store the places in the database
for i in tqdm(range(len(data))):
cur.execute("""
INSERT INTO places (Place_Name, Description, Category, City, Price, Rating)
VALUES (?, ?, ?, ?, ?, ?)
""", (data['Place_Name'][i], data['Description'][i], data['Category'][i], data['City'][i], float(data['Price'][i]), float(data['Rating'][i]))
)
# Commit the changes to the database
con.commit()
# Compute and store embeddings
def compute_and_store_embeddings():
model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
# Select all places from the database
cur.execute("SELECT Place_Id, Place_Name, Category, Description, City FROM places")
places = cur.fetchall()
for place in places:
# Combine PlaceName, Category, Description, and City into one string
text = f"{place[1]} {place[2]} {place[3]} {place[4]}"
# Generate embedding for the combined text
embedding = model.encode(text)
# Convert embedding to a string format to store in the database
embedding_str = ','.join([str(x) for x in embedding])
# Update the place in the database with the embedding
cur.execute(
"UPDATE places SET Embedding = ? WHERE Place_Id = ?",
(embedding_str, place[0])
)
# Commit the changes to the database
con.commit()
# Run the function to compute and store embeddings
compute_and_store_embeddings()
# Load Hugging Face model for generating embeddings
model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
# Normalize user query using Groq VM
def normalize_query(user_query):
try:
response = client.chat.completions.create(
model="llama-3.1-70b-versatile",
temperature=0.5,
messages=[{
"role": "user",
"content": f"""
Please analyze the query: \"{user_query}\", extract Place name, Category, Description, and City.
Return the response as: "Place name, Category, Description, City".
"""
}]
)
normalized_user_query = response.choices[0].message.content.split('\n')[-1].strip()
print(f"Normalized Query: {normalized_user_query}")
return normalized_user_query
except Exception as e:
print(f"Error normalizing query: {e}")
return ""
# Generate user embedding using Hugging Face model
def get_user_embedding(query):
try:
return model.encode(query)
except Exception as e:
print(f"Error generating embedding: {e}")
return np.zeros()
# Find similar places based on cosine similarity
def get_similar_places(user_embedding):
similarities = []
# Select all places from the database
res = cur.execute("SELECT * FROM places").fetchall()
for place in res:
embedding_str = place['Embedding'] # Assuming embeddings are stored as comma-separated strings in the database
embedding = np.array([float(x) for x in embedding_str.split(',')]) # Convert the string back to a numpy array
# Compute cosine similarity
similarity = cosine_similarity([user_embedding], [embedding])[0][0]
similarities.append((place, similarity))
# Sort results based on similarity and then by rating
ranked_results = sorted(similarities, key=lambda x: (x[1], x[0]['Rating']), reverse=True)
# Return top places
return ranked_results
# Main function to get top 5 destinations
def get_top_5_destinations(user_query):
normalized_query = normalize_query(user_query)
user_embedding = get_user_embedding(normalized_query)
similar_places = get_similar_places(user_embedding)
if not similar_places:
return "Tidak ada tempat yang ditemukan."
top_places = []
for i, (place, similarity) in enumerate(similar_places[:10]):
top_places.append({
'name': place['Place_Name'],
'city': place['City'],
'category': place['Category'],
'rating': place['Rating'],
'description': place['Description'],
'similarity': similarity
})
print(top_places)
return top_places
# Generate response to user using Groq VM
def generate_response(user_query, top_places):
try:
# Prepare the destinations data in JSON format for the model to use directly
destinations_data = ", ".join([
f'{{"name": "{place["name"]}", "city": "{place["city"]}", "category": "{place["category"]}", "rating": {place["rating"]}, "description": "{place["description"]}"}}'
for place in top_places
])
# System prompt: Simplified and focused on returning only the recommendations
system_prompt = """
You are a tour guide assistant. Your task is to present the following tourism recommendations based on what user want and needs to the user in Bahasa Indonesia.
- For each destination, include the name, city, category, rating, and a short description.
- Do not provide any additional commentary.
- Only and must only return 5 places that suitable what user wants and provided the data in a clear and concise format.
"""
# Generate the response using the model
response = client.chat.completions.create(
model="llama-3.1-70b-versatile",
temperature=0.2,
messages=[
{"role": "system", "content": system_prompt}, # System prompt defines behavior
{"role": "user", "content": f"Berikut adalah rekomendasi berdasarkan data: {destinations_data}"}
]
)
# Return the response content generated by the model
return response.choices[0].message.content
except Exception as e:
print(f"Error generating response: {e}")
return "Maaf, terjadi kesalahan dalam menghasilkan rekomendasi."
# Gradio Interface - User Input and Output
def chatbot(user_query):
# Step 1: Get the top 5 destinations
top_places = get_top_5_destinations(user_query)
if isinstance(top_places, str): # Error case, e.g. "No places found"
return top_places
# only the first 5 element of top_places
response = generate_response(user_query, top_places)
return response
# Define Gradio Interface
iface = gr.Interface(
fn=chatbot,
inputs="text",
outputs="text",
title="Tourism Recommendation Chatbot",
description="Masukkan pertanyaan wisata Anda dan dapatkan rekomendasi tempat terbaik!"
)
# Launch the Gradio App
if __name__ == "__main__":
iface.launch(share=True) |