File size: 5,429 Bytes
34e1933
 
 
 
 
 
 
2c9b5e9
34e1933
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2c9b5e9
34e1933
 
2c9b5e9
34e1933
 
2c9b5e9
34e1933
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2c9b5e9
 
34e1933
2c9b5e9
34e1933
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2c9b5e9
34e1933
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import os
import hashlib

import pandas as pd
from openai import OpenAI
import gradio as gr

input_file = "./data/sample_gpg_data.jsonl"
user_df = pd.read_json(input_file, lines=True)

user_ids = user_df["user_id"].unique().tolist()
client = OpenAI(api_key=os.environ.get('OPENAI_API_KEY'))


# Simple in-memory cache
guidance_cache = {}
profile_cache = {}

def hash_titles(titles):
    joined = "\n".join(sorted(titles))
    return hashlib.md5(joined.encode("utf-8")).hexdigest()

def get_books(user_id):
    if user_id is None:
        return "Please select a user.", pd.DataFrame(), ""

    user_info = user_df.loc[user_df["user_id"] == user_id]
    print(user_info)
    books_list = user_df.loc[user_df["user_id"] == user_id, "purchased_books"].values
    if len(books_list) == 0:
        return f"No books found for {user_id}.", pd.DataFrame(), ""

    books = books_list[0]
    df = pd.DataFrame(books)
    df = df[['title', 'author', 'categories']].rename(columns={'title': 'Title', 'author': 'Author', 'categories': 'Category'})
    books_info = generate_books(books_list)
    titles = [book["title"] for book in books if "title" in book]

    cache_key = hash_titles(titles)

    if cache_key in guidance_cache:
        guidance_response = guidance_cache[cache_key]
        profile_response = profile_cache[cache_key]
        print("✅ Using cached response")
    else:
        print("🧠 Calling OpenAI API")
        guidance_prompt_str = guidance_prompt(books_info)
        guidance_response = client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[{"role": "user", "content": guidance_prompt_str}],
            temperature=0.3,
            max_tokens=150
        ).choices[0].message.content.strip()
        guidance_cache[cache_key] = guidance_response
        profile_response = client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "user", "content": profile_prompt(books_info, guidance_response)}
            ],
            temperature=0.3,
            max_tokens=150
        ).choices[0].message.content.strip()
        profile_cache[cache_key] = profile_response
        candidates_options = list(user_info.get("candidate_options", []))
        rec_prompt = build_recommendation_prompt(profile_response, candidates_options)
        choice = extract_choice(rec_prompt)
        predicted_book = candidates_options[choice-1] if choice and 1 <= choice <= len(candidates_options) else None
        target_book = user_info.get("target_asin", '')
        print("target_book:", target_book)
    return f"{user_id}", df, guidance_response, profile_response, rec_prompt, pd.DataFrame(candidates_options[0]), target_book.values, predicted_book[0]['asin']

def extract_choice(response_text):
    for token in response_text.split():
        if token.strip("[]").isdigit():
            return int(token.strip("[]"))
    return None

def generate_books(books):
    book_combos = []
    for book in books:
        categories = ', '.join(book[0]['categories'])
        book_combos.append(f"Title of the book is {book[0]['title']} and the category of the book is {categories}. Description of the book is {book[0]['description']}")
    return book_combos

def guidance_prompt(titles):
    return f"""Here is a list of books a person has read:\n{chr(10).join("- " + t for t in titles)}\n\nWhat genres or themes do you notice across these books? Please list them concisely."""

def profile_prompt(titles, guidance):
    return f"""Here is a list of books a person has read:\n{chr(10).join("- " + t for t in titles)}\n\nBased on the following genres/themes: {guidance}\n\nSummarize this person's book preferences in one paragraph."""

def build_recommendation_prompt(profile, candidates):
    prompt = f"""A user has the following reading preference:\n"{profile}"\n\nHere are some books they might consider next:\n"""
    if len(candidates) == 1 and isinstance(candidates[0], list):
        candidates = candidates[0]
    for i, book in enumerate(candidates, start=1):
        prompt += f"[{i}] {book.get('title', 'Unknown Title')}\n"
    prompt += "\nWhich of these books best matches the user's preference? Respond ONLY with the number [1-4]."
    return prompt


def get_books_theme(books):
    return

with gr.Blocks() as demo:
    gr.Markdown("## Select User")
    user_dropdown = gr.Dropdown(choices=user_df["user_id"].tolist(), value=None, label="User ID")

    gr.Markdown("## Selected User")
    output_text = gr.Textbox(show_label=False)
    gr.Markdown("## Books read")
    output_table = gr.Dataframe(label="Books Read", interactive=False, show_label=False)
    gr.Markdown("## User Books Theme")
    output_theme = gr.Textbox(label="User Books Theme", lines=8, show_label=False)
    gr.Markdown("## User Profile")
    output_profile = gr.Textbox(label="User Profile", show_label=False, lines=6)
    output_rec_prompt = gr.Textbox(label="Recommendation Prompt", lines=8)
    output_candidate_options = gr.DataFrame(label="Candidate Books")
    output_target_id = gr.Textbox(label="Target Book")
    output_predicted_book = gr.Textbox(label="Predicted Book")
    
    user_dropdown.change(fn=get_books, inputs=user_dropdown, outputs=[output_text, output_table, output_theme, output_profile, output_rec_prompt, output_candidate_options, output_target_id, output_predicted_book])


if __name__ == "__main__":
    demo.launch()