Spaces:

LunaticMaestro
/

book-recommender

Running

Deepak Sahu commited on Nov 23, 2024

Commit

ba0a0d3

1 Parent(s): 219a526

speedup workaround 2

Files changed (3) hide show

app.py CHANGED Viewed

@@ -1,7 +1,9 @@
 from z_utils import get_dataframe
 import gradio as gr
 from z_hypothetical_summary import generate_summaries
 from transformers import pipeline, set_seed
 # CONST
@@ -9,7 +11,7 @@ CLEAN_DF_UNIQUE_TITLES = "unique_titles_books_summary.csv"
 N_RECOMMENDS = 5
 set_seed(42)
 TRAINED_CASUAL_MODEL = "LunaticMaestro/gpt2-book-summary-generator"
 if gr.NO_RELOAD:
     # Load store books
@@ -18,16 +20,17 @@ if gr.NO_RELOAD:
     # Load generator model
     generator_model = pipeline('text-generation', model=TRAINED_CASUAL_MODEL)
-    from z_similarity import computes_similarity_w_hypothetical
 def get_recommendation(book_title: str) -> str:
-    global generator_model
     # output = generator_model("Love")
     fake_summaries = generate_summaries(book_title=book_title, n_samples=5, model=generator_model) # other parameters are set to default in the function
     # Compute Simialrity
-    similarity, ranks = computes_similarity_w_hypothetical(hypothetical_summaries=fake_summaries)
     # Get ranked Documents
     df_ranked =  books_df.iloc[ranks]

 from z_utils import get_dataframe
 import gradio as gr
 from z_hypothetical_summary import generate_summaries
+from z_similarity import computes_similarity_w_hypothetical
 from transformers import pipeline, set_seed
+from sentence_transformers import SentenceTransformer
 # CONST
 N_RECOMMENDS = 5
 set_seed(42)
 TRAINED_CASUAL_MODEL = "LunaticMaestro/gpt2-book-summary-generator"
+EMB_MODEL = "all-MiniLM-L6-v2"
 if gr.NO_RELOAD:
     # Load store books
     # Load generator model
     generator_model = pipeline('text-generation', model=TRAINED_CASUAL_MODEL)
+    # Load embedding model
+    emb_model = SentenceTransformer(EMB_MODEL)
 def get_recommendation(book_title: str) -> str:
+    global generator_model, emb_model
     # output = generator_model("Love")
     fake_summaries = generate_summaries(book_title=book_title, n_samples=5, model=generator_model) # other parameters are set to default in the function
     # Compute Simialrity
+    similarity, ranks = computes_similarity_w_hypothetical(hypothetical_summaries=fake_summaries, model=emb_model)
     # Get ranked Documents
     df_ranked =  books_df.iloc[ranks]

z_embedding.py CHANGED Viewed

@@ -9,15 +9,13 @@ EMB_MODEL = "all-MiniLM-L6-v2"
 INP_DATASET_CSV = "unique_titles_books_summary.csv"
 CACHE_SUMMARY_EMB_NPY = "app_cache/summary_vectors.npy"
-# Load Model
-#   setting this at global level because entire runtime will continue to use this model.
-import gradio as gr
-if gr.NO_RELOAD: # Required for faster working with HF spaces
-	model = SentenceTransformer(EMB_MODEL)
 def dataframe_compute_summary_vector(books_df: pd.DataFrame) -> np.ndarray:
     '''Takes books summaries and compute embedding vectors
@@ -30,7 +28,7 @@ def dataframe_compute_summary_vector(books_df: pd.DataFrame) -> np.ndarray:
     Returns:
         pd.DataFrame: The processed DataFrame with new column `vector`
     '''
-    global model
     if 'summaries' not in books_df.columns:
         raise ValueError("DataFrame must contain 'summaries' columns.")

 INP_DATASET_CSV = "unique_titles_books_summary.csv"
 CACHE_SUMMARY_EMB_NPY = "app_cache/summary_vectors.npy"
+model = None
+def load_model():
+    global model
+    if model is None:
+        model = SentenceTransformer(EMB_MODEL)
+    return model
 def dataframe_compute_summary_vector(books_df: pd.DataFrame) -> np.ndarray:
     '''Takes books summaries and compute embedding vectors
     Returns:
         pd.DataFrame: The processed DataFrame with new column `vector`
     '''
+    model = load_model()
     if 'summaries' not in books_df.columns:
         raise ValueError("DataFrame must contain 'summaries' columns.")

z_similarity.py CHANGED Viewed

@@ -1,11 +1,11 @@
 from z_utils import load_cache_embeddings
-from z_embedding import model, get_embeddings
 import torch
 import numpy as np
 books_summaries_embs = load_cache_embeddings()
-def computes_similarity_w_hypothetical(hypothetical_summaries: list[str]) -> (np.ndarray, np.ndarray):
     '''Computes cosine similarity between stored book_summaries and all hypothetical_summaries
     Returns:
@@ -14,7 +14,9 @@ def computes_similarity_w_hypothetical(hypothetical_summaries: list[str]) -> (np
         Ranks of the books summaries based on above consine similarity Distance; Lower ranks means more similar
     '''
-    global books_summaries_embs, model
     hypothetical_summaries_embs = get_embeddings(hypothetical_summaries)
     similarity: torch.Tensor = model.similarity(books_summaries_embs, hypothetical_summaries_embs)

 from z_utils import load_cache_embeddings
+from z_embedding import load_model, get_embeddings
 import torch
 import numpy as np
 books_summaries_embs = load_cache_embeddings()
+def computes_similarity_w_hypothetical(hypothetical_summaries: list[str], model = None) -> (np.ndarray, np.ndarray):
     '''Computes cosine similarity between stored book_summaries and all hypothetical_summaries
     Returns:
         Ranks of the books summaries based on above consine similarity Distance; Lower ranks means more similar
     '''
+    global books_summaries_embs
+    model = model if model else load_model()
     hypothetical_summaries_embs = get_embeddings(hypothetical_summaries)
     similarity: torch.Tensor = model.similarity(books_summaries_embs, hypothetical_summaries_embs)