Spaces:
Running
Running
File size: 3,806 Bytes
37f6f60 92c0891 89ba56c ba0a0d3 d6ff5a1 ba0a0d3 d6ff5a1 37f6f60 9062ce3 d6ff5a1 37f6f60 ba0a0d3 6795e5d bafb8fc 0720e54 6795e5d 37f6f60 bafb8fc d6ff5a1 37f6f60 69b5f29 ba0a0d3 3abee27 6795e5d 9062ce3 6795e5d ba0a0d3 6795e5d 1656c60 b1228d0 9062ce3 ba0a0d3 d6ff5a1 9062ce3 6795e5d ebf589b e77e4c7 9062ce3 6795e5d 38d1ee1 6795e5d b2ed780 acffe44 b2ed780 6795e5d acffe44 b2ed780 6795e5d 483f6d3 6795e5d 9eb52f8 6795e5d 3abee27 bd396f2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 |
from z_utils import get_dataframe
import gradio as gr
from z_hypothetical_summary import generate_summaries
from z_similarity import computes_similarity_w_hypothetical
from transformers import pipeline, set_seed
from sentence_transformers import SentenceTransformer
# CONST
CLEAN_DF_UNIQUE_TITLES = "unique_titles_books_summary.csv"
N_RECOMMENDS = 5
set_seed(42)
TRAINED_CASUAL_MODEL = "LunaticMaestro/gpt2-book-summary-generator"
EMB_MODEL = "all-MiniLM-L6-v2"
GRADIO_TITLE = "Content Based Book Recommender"
GRADIO_DESCRIPTION = '''
This is a [HyDE](https://arxiv.org/abs/2212.10496) based searching mechanism that generates random summaries using your input book title and matches books which has summary similary to generated ones. The books, for search, are used from used [Kaggle Dataset: arpansri/books-summary](https://www.kaggle.com/datasets/arpansri/books-summary)
**Should take ~ 15s to 30s** for inferencing.
## Is it slow 🐢? (Happens in free HF space)
Cold starting in HF space can lead to model file reloading. The entire process will lasts 300s and **decreases to 15s when you have made sufficiently many ~10 to 15 calls**
'''
# Caching mechanism for gradio
if gr.NO_RELOAD: # Reference: https://www.gradio.app/guides/developing-faster-with-reload-mode
# Load store books
books_df = get_dataframe(CLEAN_DF_UNIQUE_TITLES)
# Load generator model
generator_model = pipeline('text-generation', model=TRAINED_CASUAL_MODEL)
# Load embedding model
emb_model = SentenceTransformer(EMB_MODEL)
def get_recommendation(book_title: str) -> list:
'''Returns data model suitable to be render in gradio interface;
Args:
book_title: the book name you are looking for
Returns
list of two values; firs value is a dictionary of <book, similarity_score>; Second Value is the card view in html generated form
'''
global generator_model, emb_model
# output = generator_model("Love")
fake_summaries = generate_summaries(book_title=book_title, n_samples=5, model=generator_model) # other parameters are set to default in the function
# Compute Simialrity
similarity, ranks = computes_similarity_w_hypothetical(hypothetical_summaries=fake_summaries, model=emb_model)
# Get ranked Documents
df_ranked = books_df.iloc[ranks]
df_ranked = df_ranked.reset_index()
# post-process for gradio interface
books = df_ranked["book_name"].to_list()[:N_RECOMMENDS]
summaries = df_ranked["summaries"].to_list()[:N_RECOMMENDS]
scores = similarity[ranks][:N_RECOMMENDS]
#
# For gr.Label interface
label_similarity: dict = {book: score for book, score in zip(books, scores)}
#
# Generate card-style HTML; to render book names and their summaries
html = "<div style='display: flex; flex-wrap: wrap; gap: 1rem;'>"
for book, summary in zip(books, summaries):
html += f"""
<div style='border: 1px solid #ddd; border-radius: 8px; padding: 1rem; width: 200px; box-shadow: 2px 2px 5px rgba(0,0,0,0.1);'>
<h3 style='margin: 0;'>{book}</h3>
<p style='font-size: 0.9rem; color: #555;'>{summary}</p>
</div>
"""
html += "</div>"
# Club the output to be processed by gradio INterface
response = [label_similarity, html]
return response
# Input Interface Render
input_textbox = gr.Textbox(label="Search for book with name similary to", placeholder="Rich Dad Poor Dad", max_lines=1)
# Output Interface Render
output = [gr.Label(label="Similar Books"), gr.HTML(label="Books Descriptions", show_label=True)]
# Stich interace and run
demo = gr.Interface(
fn=get_recommendation,
inputs=input_textbox,
outputs=output,
title=GRADIO_TITLE,
description=GRADIO_DESCRIPTION
)
demo.launch(share=True)
|