Deepak Sahu
section update
abf887e
raw
history blame
3.72 kB
from z_utils import get_dataframe
import gradio as gr
from z_hypothetical_summary import generate_summaries
from z_similarity import computes_similarity_w_hypothetical
from transformers import pipeline, set_seed
from sentence_transformers import SentenceTransformer
# CONST
CLEAN_DF_UNIQUE_TITLES = "unique_titles_books_summary.csv"
N_RECOMMENDS = 5
set_seed(42)
TRAINED_CASUAL_MODEL = "LunaticMaestro/gpt2-book-summary-generator"
EMB_MODEL = "all-MiniLM-L6-v2"
GRADIO_TITLE = "Content Based Book Recommender"
GRADIO_DESCRIPTION = '''
This is a [HyDE](https://arxiv.org/abs/2212.10496) based searching mechanism that generates random summaries using your input book title and matches books which has summary similary to generated ones. The books, for search, are used from used [Kaggle Dataset: arpansri/books-summary](https://www.kaggle.com/datasets/arpansri/books-summary)
**Should take ~ 15s to 30s** for inferencing. If taking time then then its cold starting in HF space which lasts 300s and **decreases to 15s when you have made sufficiently many ~10 to 15 call**
'''
# Caching mechanism for gradio
if gr.NO_RELOAD: # Reference: https://www.gradio.app/guides/developing-faster-with-reload-mode
# Load store books
books_df = get_dataframe(CLEAN_DF_UNIQUE_TITLES)
# Load generator model
generator_model = pipeline('text-generation', model=TRAINED_CASUAL_MODEL)
# Load embedding model
emb_model = SentenceTransformer(EMB_MODEL)
def get_recommendation(book_title: str) -> list:
'''Returns data model suitable to be render in gradio interface;
Args:
book_title: the book name you are looking for
Returns
list of two values; firs value is a dictionary of <book, similarity_score>; Second Value is the card view in html generated form
'''
global generator_model, emb_model
# output = generator_model("Love")
fake_summaries = generate_summaries(book_title=book_title, n_samples=5, model=generator_model) # other parameters are set to default in the function
# Compute Simialrity
similarity, ranks = computes_similarity_w_hypothetical(hypothetical_summaries=fake_summaries, model=emb_model)
# Get ranked Documents
df_ranked = books_df.iloc[ranks]
df_ranked = df_ranked.reset_index()
# post-process for gradio interface
books = df_ranked["book_name"].to_list()[:N_RECOMMENDS]
summaries = df_ranked["summaries"].to_list()[:N_RECOMMENDS]
scores = similarity[ranks][:N_RECOMMENDS]
#
# For gr.Label interface
label_similarity: dict = {book: score for book, score in zip(books, scores)}
#
# Generate card-style HTML; to render book names and their summaries
html = "<div style='display: flex; flex-wrap: wrap; gap: 1rem;'>"
for book, summary in zip(books, summaries):
html += f"""
<div style='border: 1px solid #ddd; border-radius: 8px; padding: 1rem; width: 200px; box-shadow: 2px 2px 5px rgba(0,0,0,0.1);'>
<h3 style='margin: 0;'>{book}</h3>
<p style='font-size: 0.9rem; color: #555;'>{summary}</p>
</div>
"""
html += "</div>"
# Club the output to be processed by gradio INterface
response = [label_similarity, html]
return response
# Input Interface Render
input_textbox = gr.Textbox(label="Search for book with name similary to", placeholder="Rich Dad Poor Dad", max_lines=1)
# Output Interface Render
output = [gr.Label(label="Similar Books"), gr.HTML(label="Books Descriptions", show_label=True)]
# Stich interace and run
demo = gr.Interface(
fn=get_recommendation,
inputs=input_textbox,
outputs=output,
title=GRADIO_TITLE,
description=GRADIO_DESCRIPTION
)
demo.launch()