Deepak Sahu commited on
Commit
ba0a0d3
·
1 Parent(s): 219a526

speedup workaround 2

Browse files
Files changed (3) hide show
  1. app.py +7 -4
  2. z_embedding.py +7 -9
  3. z_similarity.py +5 -3
app.py CHANGED
@@ -1,7 +1,9 @@
1
  from z_utils import get_dataframe
2
  import gradio as gr
3
  from z_hypothetical_summary import generate_summaries
 
4
  from transformers import pipeline, set_seed
 
5
 
6
 
7
  # CONST
@@ -9,7 +11,7 @@ CLEAN_DF_UNIQUE_TITLES = "unique_titles_books_summary.csv"
9
  N_RECOMMENDS = 5
10
  set_seed(42)
11
  TRAINED_CASUAL_MODEL = "LunaticMaestro/gpt2-book-summary-generator"
12
-
13
 
14
  if gr.NO_RELOAD:
15
  # Load store books
@@ -18,16 +20,17 @@ if gr.NO_RELOAD:
18
  # Load generator model
19
  generator_model = pipeline('text-generation', model=TRAINED_CASUAL_MODEL)
20
 
21
- from z_similarity import computes_similarity_w_hypothetical
 
22
 
23
 
24
  def get_recommendation(book_title: str) -> str:
25
- global generator_model
26
  # output = generator_model("Love")
27
  fake_summaries = generate_summaries(book_title=book_title, n_samples=5, model=generator_model) # other parameters are set to default in the function
28
 
29
  # Compute Simialrity
30
- similarity, ranks = computes_similarity_w_hypothetical(hypothetical_summaries=fake_summaries)
31
 
32
  # Get ranked Documents
33
  df_ranked = books_df.iloc[ranks]
 
1
  from z_utils import get_dataframe
2
  import gradio as gr
3
  from z_hypothetical_summary import generate_summaries
4
+ from z_similarity import computes_similarity_w_hypothetical
5
  from transformers import pipeline, set_seed
6
+ from sentence_transformers import SentenceTransformer
7
 
8
 
9
  # CONST
 
11
  N_RECOMMENDS = 5
12
  set_seed(42)
13
  TRAINED_CASUAL_MODEL = "LunaticMaestro/gpt2-book-summary-generator"
14
+ EMB_MODEL = "all-MiniLM-L6-v2"
15
 
16
  if gr.NO_RELOAD:
17
  # Load store books
 
20
  # Load generator model
21
  generator_model = pipeline('text-generation', model=TRAINED_CASUAL_MODEL)
22
 
23
+ # Load embedding model
24
+ emb_model = SentenceTransformer(EMB_MODEL)
25
 
26
 
27
  def get_recommendation(book_title: str) -> str:
28
+ global generator_model, emb_model
29
  # output = generator_model("Love")
30
  fake_summaries = generate_summaries(book_title=book_title, n_samples=5, model=generator_model) # other parameters are set to default in the function
31
 
32
  # Compute Simialrity
33
+ similarity, ranks = computes_similarity_w_hypothetical(hypothetical_summaries=fake_summaries, model=emb_model)
34
 
35
  # Get ranked Documents
36
  df_ranked = books_df.iloc[ranks]
z_embedding.py CHANGED
@@ -9,15 +9,13 @@ EMB_MODEL = "all-MiniLM-L6-v2"
9
  INP_DATASET_CSV = "unique_titles_books_summary.csv"
10
  CACHE_SUMMARY_EMB_NPY = "app_cache/summary_vectors.npy"
11
 
12
- # Load Model
13
- # setting this at global level because entire runtime will continue to use this model.
14
-
15
- import gradio as gr
16
-
17
- if gr.NO_RELOAD: # Required for faster working with HF spaces
18
- model = SentenceTransformer(EMB_MODEL)
19
-
20
 
 
 
 
 
 
21
 
22
  def dataframe_compute_summary_vector(books_df: pd.DataFrame) -> np.ndarray:
23
  '''Takes books summaries and compute embedding vectors
@@ -30,7 +28,7 @@ def dataframe_compute_summary_vector(books_df: pd.DataFrame) -> np.ndarray:
30
  Returns:
31
  pd.DataFrame: The processed DataFrame with new column `vector`
32
  '''
33
- global model
34
 
35
  if 'summaries' not in books_df.columns:
36
  raise ValueError("DataFrame must contain 'summaries' columns.")
 
9
  INP_DATASET_CSV = "unique_titles_books_summary.csv"
10
  CACHE_SUMMARY_EMB_NPY = "app_cache/summary_vectors.npy"
11
 
12
+ model = None
 
 
 
 
 
 
 
13
 
14
+ def load_model():
15
+ global model
16
+ if model is None:
17
+ model = SentenceTransformer(EMB_MODEL)
18
+ return model
19
 
20
  def dataframe_compute_summary_vector(books_df: pd.DataFrame) -> np.ndarray:
21
  '''Takes books summaries and compute embedding vectors
 
28
  Returns:
29
  pd.DataFrame: The processed DataFrame with new column `vector`
30
  '''
31
+ model = load_model()
32
 
33
  if 'summaries' not in books_df.columns:
34
  raise ValueError("DataFrame must contain 'summaries' columns.")
z_similarity.py CHANGED
@@ -1,11 +1,11 @@
1
  from z_utils import load_cache_embeddings
2
- from z_embedding import model, get_embeddings
3
  import torch
4
  import numpy as np
5
 
6
  books_summaries_embs = load_cache_embeddings()
7
 
8
- def computes_similarity_w_hypothetical(hypothetical_summaries: list[str]) -> (np.ndarray, np.ndarray):
9
  '''Computes cosine similarity between stored book_summaries and all hypothetical_summaries
10
 
11
  Returns:
@@ -14,7 +14,9 @@ def computes_similarity_w_hypothetical(hypothetical_summaries: list[str]) -> (np
14
 
15
  Ranks of the books summaries based on above consine similarity Distance; Lower ranks means more similar
16
  '''
17
- global books_summaries_embs, model
 
 
18
  hypothetical_summaries_embs = get_embeddings(hypothetical_summaries)
19
  similarity: torch.Tensor = model.similarity(books_summaries_embs, hypothetical_summaries_embs)
20
 
 
1
  from z_utils import load_cache_embeddings
2
+ from z_embedding import load_model, get_embeddings
3
  import torch
4
  import numpy as np
5
 
6
  books_summaries_embs = load_cache_embeddings()
7
 
8
+ def computes_similarity_w_hypothetical(hypothetical_summaries: list[str], model = None) -> (np.ndarray, np.ndarray):
9
  '''Computes cosine similarity between stored book_summaries and all hypothetical_summaries
10
 
11
  Returns:
 
14
 
15
  Ranks of the books summaries based on above consine similarity Distance; Lower ranks means more similar
16
  '''
17
+ global books_summaries_embs
18
+ model = model if model else load_model()
19
+
20
  hypothetical_summaries_embs = get_embeddings(hypothetical_summaries)
21
  similarity: torch.Tensor = model.similarity(books_summaries_embs, hypothetical_summaries_embs)
22