from z_utils import get_dataframe import gradio as gr from z_hypothetical_summary import generate_summaries from z_similarity import computes_similarity_w_hypothetical from transformers import pipeline, set_seed from sentence_transformers import SentenceTransformer # CONST CLEAN_DF_UNIQUE_TITLES = "unique_titles_books_summary.csv" N_RECOMMENDS = 5 set_seed(42) TRAINED_CASUAL_MODEL = "LunaticMaestro/gpt2-book-summary-generator" EMB_MODEL = "all-MiniLM-L6-v2" GRADIO_TITLE = "Content Based Book Recommender" GRADIO_DESCRIPTION = ''' This is a [HyDE](https://arxiv.org/abs/2212.10496) based searching mechanism that generates random summaries using your input book title and matches books which has summary similary to generated ones. The books, for search, are used from used [Kaggle Dataset: arpansri/books-summary](https://www.kaggle.com/datasets/arpansri/books-summary) **Should take ~ 15s to 30s** for inferencing. If taking time then then its cold starting in HF space which lasts 300s and **decreases to 15s when you have made sufficiently many ~10 to 15 call** ''' # Caching mechanism for gradio if gr.NO_RELOAD: # Reference: https://www.gradio.app/guides/developing-faster-with-reload-mode # Load store books books_df = get_dataframe(CLEAN_DF_UNIQUE_TITLES) # Load generator model generator_model = pipeline('text-generation', model=TRAINED_CASUAL_MODEL) # Load embedding model emb_model = SentenceTransformer(EMB_MODEL) def get_recommendation(book_title: str) -> list: '''Returns data model suitable to be render in gradio interface; Args: book_title: the book name you are looking for Returns list of two values; firs value is a dictionary of ; Second Value is the card view in html generated form ''' global generator_model, emb_model # output = generator_model("Love") fake_summaries = generate_summaries(book_title=book_title, n_samples=5, model=generator_model) # other parameters are set to default in the function # Compute Simialrity similarity, ranks = computes_similarity_w_hypothetical(hypothetical_summaries=fake_summaries, model=emb_model) # Get ranked Documents df_ranked = books_df.iloc[ranks] df_ranked = df_ranked.reset_index() # post-process for gradio interface books = df_ranked["book_name"].to_list()[:N_RECOMMENDS] summaries = df_ranked["summaries"].to_list()[:N_RECOMMENDS] scores = similarity[ranks][:N_RECOMMENDS] # # For gr.Label interface label_similarity: dict = {book: score for book, score in zip(books, scores)} # # Generate card-style HTML; to render book names and their summaries html = "
" for book, summary in zip(books, summaries): html += f"""

{book}

{summary}

""" html += "
" # Club the output to be processed by gradio INterface response = [label_similarity, html] return response # Input Interface Render input_textbox = gr.Textbox(label="Search for book with name similary to", placeholder="Rich Dad Poor Dad", max_lines=1) # Output Interface Render output = [gr.Label(label="Similar Books"), gr.HTML(label="Books Descriptions", show_label=True)] # Stich interace and run demo = gr.Interface( fn=get_recommendation, inputs=input_textbox, outputs=output, title=GRADIO_TITLE, description=GRADIO_DESCRIPTION ) demo.launch()