File size: 3,806 Bytes
37f6f60
92c0891
89ba56c
ba0a0d3
d6ff5a1
ba0a0d3
d6ff5a1
37f6f60
9062ce3
 
 
d6ff5a1
37f6f60
ba0a0d3
6795e5d
 
 
bafb8fc
0720e54
 
 
 
6795e5d
 
 
 
37f6f60
 
bafb8fc
d6ff5a1
37f6f60
69b5f29
ba0a0d3
 
3abee27
6795e5d
 
 
 
 
9062ce3
6795e5d
 
 
ba0a0d3
6795e5d
1656c60
b1228d0
9062ce3
 
ba0a0d3
d6ff5a1
9062ce3
 
 
 
6795e5d
ebf589b
e77e4c7
9062ce3
6795e5d
 
38d1ee1
 
6795e5d
b2ed780
 
 
 
 
acffe44
b2ed780
 
 
 
6795e5d
acffe44
b2ed780
 
 
6795e5d
 
 
 
483f6d3
6795e5d
 
 
 
 
 
 
9eb52f8
6795e5d
3abee27
bd396f2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
from z_utils import get_dataframe
import gradio as gr
from z_hypothetical_summary import generate_summaries
from z_similarity import computes_similarity_w_hypothetical
from transformers import pipeline, set_seed
from sentence_transformers import SentenceTransformer


# CONST
CLEAN_DF_UNIQUE_TITLES = "unique_titles_books_summary.csv"
N_RECOMMENDS = 5
set_seed(42)
TRAINED_CASUAL_MODEL = "LunaticMaestro/gpt2-book-summary-generator"
EMB_MODEL = "all-MiniLM-L6-v2"
GRADIO_TITLE = "Content Based Book Recommender"
GRADIO_DESCRIPTION = '''
This is a [HyDE](https://arxiv.org/abs/2212.10496) based searching mechanism that generates random summaries using your input book title and matches books which has summary similary to generated ones. The books, for search, are used from used [Kaggle Dataset: arpansri/books-summary](https://www.kaggle.com/datasets/arpansri/books-summary)

**Should take ~ 15s to 30s** for inferencing.

## Is it slow 🐢? (Happens in free HF space)
Cold starting in HF space can lead to model file reloading. The entire process will lasts 300s  and **decreases to 15s when you have made sufficiently many ~10 to 15 calls**
'''

# Caching mechanism for gradio
if gr.NO_RELOAD: # Reference: https://www.gradio.app/guides/developing-faster-with-reload-mode
    # Load store books
    books_df = get_dataframe(CLEAN_DF_UNIQUE_TITLES)

    # Load generator model
    generator_model = pipeline('text-generation', model=TRAINED_CASUAL_MODEL)

    # Load embedding model 
    emb_model = SentenceTransformer(EMB_MODEL)

def get_recommendation(book_title: str) -> list:
    '''Returns data model suitable to be render in gradio interface;

    Args:
        book_title: the book name you are looking for

    Returns 
     list of two values; firs value is a dictionary of <book, similarity_score>; Second Value is the card view in html generated form
    '''
    global generator_model, emb_model

    # output = generator_model("Love")
    fake_summaries = generate_summaries(book_title=book_title, n_samples=5, model=generator_model) # other parameters are set to default in the function
    
    # Compute Simialrity 
    similarity, ranks = computes_similarity_w_hypothetical(hypothetical_summaries=fake_summaries, model=emb_model)

    # Get ranked Documents 
    df_ranked =  books_df.iloc[ranks]
    df_ranked = df_ranked.reset_index()
    
    # post-process for gradio interface
    books = df_ranked["book_name"].to_list()[:N_RECOMMENDS]
    summaries = df_ranked["summaries"].to_list()[:N_RECOMMENDS]
    scores = similarity[ranks][:N_RECOMMENDS]
    #
    # For gr.Label interface
    label_similarity: dict = {book: score for book, score in zip(books, scores)}
    #
    # Generate card-style HTML; to render book names and their summaries
    html = "<div style='display: flex; flex-wrap: wrap; gap: 1rem;'>"
    for book, summary in zip(books, summaries):
        html += f"""
        <div style='border: 1px solid #ddd; border-radius: 8px; padding: 1rem; width: 200px; box-shadow: 2px 2px 5px rgba(0,0,0,0.1);'>
            <h3 style='margin: 0;'>{book}</h3>
            <p style='font-size: 0.9rem; color: #555;'>{summary}</p>
        </div>
        """
    html += "</div>"

    # Club the output to be processed by gradio INterface
    response = [label_similarity, html]

    return response

# Input Interface Render
input_textbox = gr.Textbox(label="Search for book with name similary to", placeholder="Rich Dad Poor Dad", max_lines=1)

# Output Interface Render
output = [gr.Label(label="Similar Books"), gr.HTML(label="Books Descriptions", show_label=True)]

# Stich interace and run
demo = gr.Interface(
    fn=get_recommendation, 
    inputs=input_textbox, 
    outputs=output,
    title=GRADIO_TITLE,
    description=GRADIO_DESCRIPTION
)

demo.launch(share=True)