File size: 7,987 Bytes
40fd038
34d69b0
65d40e5
40fd038
 
 
 
 
 
 
 
27e23d9
 
 
 
40fd038
 
b8baef5
40fd038
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18869c5
40fd038
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1271c0b
40fd038
ca8e725
40fd038
 
 
 
 
 
 
 
 
e64d5d0
 
40fd038
e64d5d0
40fd038
 
 
65d40e5
40fd038
 
 
 
 
 
ca8e725
40fd038
 
 
 
8a369cb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40fd038
 
 
 
 
 
 
 
 
 
 
069c659
40fd038
 
 
 
 
 
 
 
e64d5d0
40fd038
 
 
9633001
40fd038
 
 
 
 
 
 
 
 
069c659
40fd038
 
069c659
40fd038
 
 
1271c0b
40fd038
ca8e725
40fd038
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a0d716d
069c659
a0d716d
40fd038
 
 
 
 
 
 
 
 
 
65d40e5
40fd038
65d40e5
8a369cb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
import os
from groq import Groq
import gradio as gr
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import sqlite3
import pandas as pd
from tqdm import tqdm

# Get the Groq API key from environment variables (in Hugging Face, this is stored as a secret)
client = Groq(
    # This is the default and can be omitted
    api_key=os.environ.get("GROQ_API_KEY"),
)


con = sqlite3.connect("file::memory:?cache=shared", check_same_thread=False)
con.row_factory = sqlite3.Row
cur = con.cursor()

# create table if not exists

cur.execute("""
            CREATE TABLE IF NOT EXISTS places (
                Place_Id INTEGER PRIMARY KEY,  -- SQLite auto-increments INTEGER PRIMARY KEY automatically
                Place_Name TEXT NOT NULL,      -- SQLite uses TEXT instead of VARCHAR
                Description TEXT,
                Category TEXT,
                City TEXT,
                Price REAL,                    -- SQLite uses REAL instead of DECIMAL or FLOAT
                Rating REAL,
                Embedding TEXT
            );
            """)


data = pd.read_csv('tourism_place.csv')


# check if the table is empty
cur.execute("SELECT * FROM places")

if cur.fetchone() is None:
    # Store the places in the database
    for i in tqdm(range(len(data))):
        cur.execute("""
            INSERT INTO places (Place_Name, Description, Category, City, Price, Rating) 
            VALUES (?, ?, ?, ?, ?, ?)
            """, (data['Place_Name'][i], data['Description'][i], data['Category'][i], data['City'][i], float(data['Price'][i]), float(data['Rating'][i]))
    )

    # Commit the changes to the database
    con.commit()

# Compute and store embeddings
def compute_and_store_embeddings():
    model = SentenceTransformer('paraphrase-MiniLM-L6-v2')  
    
    # Select all places from the database
    cur.execute("SELECT Place_Id, Place_Name, Category, Description, City FROM places")
    places = cur.fetchall()
    
    for place in places:
        # Combine PlaceName, Category, Description, and City into one string
        text = f"{place[1]} {place[2]} {place[3]} {place[4]}"
        
        # Generate embedding for the combined text
        embedding = model.encode(text)
        
        # Convert embedding to a string format to store in the database
        embedding_str = ','.join([str(x) for x in embedding])
        
        # Update the place in the database with the embedding
        cur.execute(
            "UPDATE places SET Embedding = ? WHERE Place_Id = ?", 
            (embedding_str, place[0])
        )
    
    # Commit the changes to the database
    con.commit()
# Run the function to compute and store embeddings
compute_and_store_embeddings()


# Load Hugging Face model for generating embeddings
model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

# Normalize user query using Groq VM
def normalize_query(user_query):
    try:
        response = client.chat.completions.create(
            model="llama-3.1-70b-versatile",
            temperature=0.5,
            messages=[{
                "role": "user",
                "content": f"""
                    Please analyze the query: \"{user_query}\", extract Place name, Category, Description, and City. 
                    Return the response as: "Place name, Category, Description, City".
                """
            }]
        )
        normalized_user_query = response.choices[0].message.content.split('\n')[-1].strip()
        print(f"Normalized Query: {normalized_user_query}")

        return normalized_user_query

    except Exception as e:
        print(f"Error normalizing query: {e}")
        return ""

# Generate user embedding using Hugging Face model
def get_user_embedding(query):
    try:
        return model.encode(query)
    except Exception as e:
        print(f"Error generating embedding: {e}")
        return np.zeros()  

# Find similar places based on cosine similarity
def get_similar_places(user_embedding):
    similarities = []
    # Select all places from the database
    res = cur.execute("SELECT * FROM places").fetchall()
    
    for place in res:
        embedding_str = place['Embedding']  # Assuming embeddings are stored as comma-separated strings in the database
        embedding = np.array([float(x) for x in embedding_str.split(',')])  # Convert the string back to a numpy array
        
        # Compute cosine similarity
        similarity = cosine_similarity([user_embedding], [embedding])[0][0]
        similarities.append((place, similarity))
    
    # Sort results based on similarity and then by rating
    ranked_results = sorted(similarities, key=lambda x: (x[1], x[0]['Rating']), reverse=True)
    
    # Return top places
    return ranked_results

# Main function to get top 5 destinations
def get_top_5_destinations(user_query):
    normalized_query = normalize_query(user_query)
    user_embedding = get_user_embedding(normalized_query)
    similar_places = get_similar_places(user_embedding)

    if not similar_places:
        return "Tidak ada tempat yang ditemukan."

    top_places = []
    for i, (place, similarity) in enumerate(similar_places[:10]):
        top_places.append({
            'name': place['Place_Name'],
            'city': place['City'],
            'category': place['Category'],
            'rating': place['Rating'],
            'description': place['Description'],
            'similarity': similarity
        })
    print(top_places)
    return top_places

# Generate response to user using Groq VM
def generate_response(user_query, top_places):
    try:
        # Prepare the destinations data in JSON format for the model to use directly
        destinations_data = ", ".join([
            f'{{"name": "{place["name"]}", "city": "{place["city"]}", "category": "{place["category"]}", "rating": {place["rating"]}, "description": "{place["description"]}"}}'
            for place in top_places
        ])

        # System prompt: Simplified and focused on returning only the recommendations
        system_prompt = """
            You are a tour guide assistant. Your task is to present the following tourism recommendations based on what user want and needs to the user in Bahasa Indonesia.
            - For each destination, include the name, city, category, rating, and a short description.
            - Do not provide any additional commentary.
            - Only and must only return 5 places that suitable what user wants and provided the data in a clear and concise format.
            """

        # Generate the response using the model
        response = client.chat.completions.create(
            model="llama-3.1-70b-versatile",
            temperature=0.2,
            messages=[
                {"role": "system", "content": system_prompt},  # System prompt defines behavior
                {"role": "user", "content": f"Berikut adalah rekomendasi berdasarkan data: {destinations_data}"}
            ]
        )

        # Return the response content generated by the model
        return response.choices[0].message.content
    except Exception as e:
        print(f"Error generating response: {e}")
        return "Maaf, terjadi kesalahan dalam menghasilkan rekomendasi."

# Gradio Interface - User Input and Output
def chatbot(user_query):
    # Step 1: Get the top 5 destinations
    top_places = get_top_5_destinations(user_query)

    if isinstance(top_places, str):  # Error case, e.g. "No places found"
        return top_places

    # only the first 5 element of top_places
    response = generate_response(user_query, top_places)

    return response

# Define Gradio Interface
iface = gr.Interface(
    fn=chatbot,
    inputs="text",  
    outputs="text",  
    title="Tourism Recommendation Chatbot",
    description="Masukkan pertanyaan wisata Anda dan dapatkan rekomendasi tempat terbaik!"
)

# Launch the Gradio App
if __name__ == "__main__":
    iface.launch(share=True)