File size: 7,635 Bytes
40fd038
34d69b0
65d40e5
40fd038
 
 
 
 
 
 
 
27e23d9
 
 
 
40fd038
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1271c0b
40fd038
 
 
 
 
 
 
 
 
 
 
 
 
 
65d40e5
40fd038
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1271c0b
40fd038
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65d40e5
40fd038
65d40e5
1271c0b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
import os
from groq import Groq
import gradio as gr
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import sqlite3
import pandas as pd
from tqdm import tqdm

# Get the Groq API key from environment variables (in Hugging Face, this is stored as a secret)
client = Groq(
    # This is the default and can be omitted
    api_key=os.environ.get("GROQ_API_KEY"),
)


con = sqlite3.connect("file::memory:?cache=shared")
con.row_factory = sqlite3.Row
cur = con.cursor()

# create table if not exists

cur.execute("""
            CREATE TABLE IF NOT EXISTS places (
                Place_Id INTEGER PRIMARY KEY,  -- SQLite auto-increments INTEGER PRIMARY KEY automatically
                Place_Name TEXT NOT NULL,      -- SQLite uses TEXT instead of VARCHAR
                Description TEXT,
                Category TEXT,
                City TEXT,
                Price REAL,                    -- SQLite uses REAL instead of DECIMAL or FLOAT
                Rating REAL,
                Embedding TEXT
            );
            """)


data = pd.read_csv('tourism_place.csv')


# check if the table is empty
cur.execute("SELECT * FROM places")

if cur.fetchone() is None:
    # Store the places in the database
    for i in tqdm(range(len(data))):
        cur.execute("""
            INSERT INTO places (Place_Name, Description, Category, City, Price, Rating) 
            VALUES (?, ?, ?, ?, ?, ?)
            """, (data['Place_Name'][i], data['Description'][i], data['Category'][i], data['City'][i], float(data['Price'][i]), float(data['Rating'][i]))
    )

    # Commit the changes to the database
    con.commit()

# Compute and store embeddings
def compute_and_store_embeddings():
    model = SentenceTransformer('paraphrase-MiniLM-L6-v2')  
    
    # Select all places from the database
    cur.execute("SELECT Place_Id, Place_Name, Category, Description, City FROM places")
    places = cur.fetchall()
    
    for place in places:
        # Combine PlaceName, Category, Description, and City into one string
        text = f"{place[1]} {place[2]} {place[3]} {place[4]}"
        
        # Generate embedding for the combined text
        embedding = model.encode(text)
        
        # Convert embedding to a string format to store in the database
        embedding_str = ','.join([str(x) for x in embedding])
        
        # Update the place in the database with the embedding
        cur.execute(
            "UPDATE places SET Embedding = ? WHERE Place_Id = ?", 
            (embedding_str, place[0])
        )
    
    # Commit the changes to the database
    con.commit()
# Run the function to compute and store embeddings
compute_and_store_embeddings()


# Load Hugging Face model for generating embeddings
model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

# Normalize user query using Groq VM
def normalize_query(user_query):
    try:
        response = client.chat.completions.create(
            model="llama-3.1-70b-versatile",
            messages=[{
                "role": "user",
                "content": f"""
                    Please analyze the query: \"{user_query}\", extract Place name, Category, Description, and City. 
                    Return the response as: "Place name, Category, Description, City".
                """
            }]
        )
        normalized_user_query = response.choices[0].message.content.split('\n')[-1].strip()
        return normalized_user_query
    except Exception as e:
        print(f"Error normalizing query: {e}")
        return ""

# Generate user embedding using Hugging Face model
def get_user_embedding(query):
    try:
        return model.encode(query)
    except Exception as e:
        print(f"Error generating embedding: {e}")
        return np.zeros(512)  # Assuming 384 as default embedding size

# Find similar places based on cosine similarity
def get_similar_places(user_embedding):
    similarities = []
    try:
        res = cur.execute("SELECT * FROM places").fetchall()

        for place in res:
            embedding_str = place['Embedding']
            embedding = np.array([float(x) for x in embedding_str.split(',')])
            similarity = cosine_similarity([user_embedding], [embedding])[0][0]
            similarities.append((place, similarity))

        ranked_results = sorted(similarities, key=lambda x: (x[1], x[0]['Rating']), reverse=True)
        return ranked_results[:5]  # Return top 5 places
    except Exception as e:
        print(f"Error fetching similar places: {e}")
        return []

# Main function to get top 5 destinations
def get_top_5_destinations(user_query):
    normalized_query = normalize_query(user_query)
    user_embedding = get_user_embedding(normalized_query)
    similar_places = get_similar_places(user_embedding)

    if not similar_places:
        return "Tidak ada tempat yang ditemukan."

    top_places = []
    for i, (place, similarity) in enumerate(similar_places):
        top_places.append({
            'name': place['Place_Name'],
            'city': place['City'],
            'category': place['Category'],
            'rating': place['Rating'],
            'description': place['Description'],
            'similarity': similarity
        })
    
    return top_places

# Generate response to user using Groq VM
def generate_response(user_query, top_places):
    try:
        # Prepare the destinations data in JSON format for the model to use directly
        destinations_data = ", ".join([
            f'{{"name": "{place["name"]}", "city": "{place["city"]}", "category": "{place["category"]}", "rating": {place["rating"]}, "description": "{place["description"]}"}}'
            for place in top_places
        ])

        # System prompt: Simplified and focused on returning only the recommendations
        system_prompt = """
            You are a tour guide assistant. Your task is to present the following tourism recommendations to the user in Bahasa Indonesia.
            - For each destination, include the name, city, category, rating, and a short description.
            - Do not provide any additional commentary.
            - Only return the provided data in a clear and concise format.
            """

        # Generate the response using the model
        response = client.chat.completions.create(
            model="llama-3.1-70b-versatile",
            messages=[
                {"role": "system", "content": system_prompt},  # System prompt defines behavior
                {"role": "user", "content": f"Berikut adalah rekomendasi berdasarkan data: {destinations_data}"}
            ]
        )

        # Return the response content generated by the model
        return response.choices[0].message.content
    except Exception as e:
        print(f"Error generating response: {e}")
        return "Maaf, terjadi kesalahan dalam menghasilkan rekomendasi."

# Gradio Interface - User Input and Output
def chatbot(user_query):
    # Step 1: Get the top 5 destinations
    top_places = get_top_5_destinations(user_query)

    if isinstance(top_places, str):  # Error case, e.g. "No places found"
        return top_places

    # Step 2: Generate the chatbot's response
    response = generate_response(user_query, top_places)
    return response

# Define Gradio Interface
iface = gr.Interface(
    fn=chatbot,
    inputs="text",  
    outputs="text",  
    title="Tourism Recommendation Chatbot",
    description="Masukkan pertanyaan wisata Anda dan dapatkan rekomendasi tempat terbaik!"
)

# Launch the Gradio App
if __name__ == "__main__":
    iface.launch(sahre=True)