Spaces:
Running
Running
Deepak Sahu
commited on
Commit
·
ba0a0d3
1
Parent(s):
219a526
speedup workaround 2
Browse files- app.py +7 -4
- z_embedding.py +7 -9
- z_similarity.py +5 -3
app.py
CHANGED
@@ -1,7 +1,9 @@
|
|
1 |
from z_utils import get_dataframe
|
2 |
import gradio as gr
|
3 |
from z_hypothetical_summary import generate_summaries
|
|
|
4 |
from transformers import pipeline, set_seed
|
|
|
5 |
|
6 |
|
7 |
# CONST
|
@@ -9,7 +11,7 @@ CLEAN_DF_UNIQUE_TITLES = "unique_titles_books_summary.csv"
|
|
9 |
N_RECOMMENDS = 5
|
10 |
set_seed(42)
|
11 |
TRAINED_CASUAL_MODEL = "LunaticMaestro/gpt2-book-summary-generator"
|
12 |
-
|
13 |
|
14 |
if gr.NO_RELOAD:
|
15 |
# Load store books
|
@@ -18,16 +20,17 @@ if gr.NO_RELOAD:
|
|
18 |
# Load generator model
|
19 |
generator_model = pipeline('text-generation', model=TRAINED_CASUAL_MODEL)
|
20 |
|
21 |
-
|
|
|
22 |
|
23 |
|
24 |
def get_recommendation(book_title: str) -> str:
|
25 |
-
global generator_model
|
26 |
# output = generator_model("Love")
|
27 |
fake_summaries = generate_summaries(book_title=book_title, n_samples=5, model=generator_model) # other parameters are set to default in the function
|
28 |
|
29 |
# Compute Simialrity
|
30 |
-
similarity, ranks = computes_similarity_w_hypothetical(hypothetical_summaries=fake_summaries)
|
31 |
|
32 |
# Get ranked Documents
|
33 |
df_ranked = books_df.iloc[ranks]
|
|
|
1 |
from z_utils import get_dataframe
|
2 |
import gradio as gr
|
3 |
from z_hypothetical_summary import generate_summaries
|
4 |
+
from z_similarity import computes_similarity_w_hypothetical
|
5 |
from transformers import pipeline, set_seed
|
6 |
+
from sentence_transformers import SentenceTransformer
|
7 |
|
8 |
|
9 |
# CONST
|
|
|
11 |
N_RECOMMENDS = 5
|
12 |
set_seed(42)
|
13 |
TRAINED_CASUAL_MODEL = "LunaticMaestro/gpt2-book-summary-generator"
|
14 |
+
EMB_MODEL = "all-MiniLM-L6-v2"
|
15 |
|
16 |
if gr.NO_RELOAD:
|
17 |
# Load store books
|
|
|
20 |
# Load generator model
|
21 |
generator_model = pipeline('text-generation', model=TRAINED_CASUAL_MODEL)
|
22 |
|
23 |
+
# Load embedding model
|
24 |
+
emb_model = SentenceTransformer(EMB_MODEL)
|
25 |
|
26 |
|
27 |
def get_recommendation(book_title: str) -> str:
|
28 |
+
global generator_model, emb_model
|
29 |
# output = generator_model("Love")
|
30 |
fake_summaries = generate_summaries(book_title=book_title, n_samples=5, model=generator_model) # other parameters are set to default in the function
|
31 |
|
32 |
# Compute Simialrity
|
33 |
+
similarity, ranks = computes_similarity_w_hypothetical(hypothetical_summaries=fake_summaries, model=emb_model)
|
34 |
|
35 |
# Get ranked Documents
|
36 |
df_ranked = books_df.iloc[ranks]
|
z_embedding.py
CHANGED
@@ -9,15 +9,13 @@ EMB_MODEL = "all-MiniLM-L6-v2"
|
|
9 |
INP_DATASET_CSV = "unique_titles_books_summary.csv"
|
10 |
CACHE_SUMMARY_EMB_NPY = "app_cache/summary_vectors.npy"
|
11 |
|
12 |
-
|
13 |
-
# setting this at global level because entire runtime will continue to use this model.
|
14 |
-
|
15 |
-
import gradio as gr
|
16 |
-
|
17 |
-
if gr.NO_RELOAD: # Required for faster working with HF spaces
|
18 |
-
model = SentenceTransformer(EMB_MODEL)
|
19 |
-
|
20 |
|
|
|
|
|
|
|
|
|
|
|
21 |
|
22 |
def dataframe_compute_summary_vector(books_df: pd.DataFrame) -> np.ndarray:
|
23 |
'''Takes books summaries and compute embedding vectors
|
@@ -30,7 +28,7 @@ def dataframe_compute_summary_vector(books_df: pd.DataFrame) -> np.ndarray:
|
|
30 |
Returns:
|
31 |
pd.DataFrame: The processed DataFrame with new column `vector`
|
32 |
'''
|
33 |
-
|
34 |
|
35 |
if 'summaries' not in books_df.columns:
|
36 |
raise ValueError("DataFrame must contain 'summaries' columns.")
|
|
|
9 |
INP_DATASET_CSV = "unique_titles_books_summary.csv"
|
10 |
CACHE_SUMMARY_EMB_NPY = "app_cache/summary_vectors.npy"
|
11 |
|
12 |
+
model = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
+
def load_model():
|
15 |
+
global model
|
16 |
+
if model is None:
|
17 |
+
model = SentenceTransformer(EMB_MODEL)
|
18 |
+
return model
|
19 |
|
20 |
def dataframe_compute_summary_vector(books_df: pd.DataFrame) -> np.ndarray:
|
21 |
'''Takes books summaries and compute embedding vectors
|
|
|
28 |
Returns:
|
29 |
pd.DataFrame: The processed DataFrame with new column `vector`
|
30 |
'''
|
31 |
+
model = load_model()
|
32 |
|
33 |
if 'summaries' not in books_df.columns:
|
34 |
raise ValueError("DataFrame must contain 'summaries' columns.")
|
z_similarity.py
CHANGED
@@ -1,11 +1,11 @@
|
|
1 |
from z_utils import load_cache_embeddings
|
2 |
-
from z_embedding import
|
3 |
import torch
|
4 |
import numpy as np
|
5 |
|
6 |
books_summaries_embs = load_cache_embeddings()
|
7 |
|
8 |
-
def computes_similarity_w_hypothetical(hypothetical_summaries: list[str]) -> (np.ndarray, np.ndarray):
|
9 |
'''Computes cosine similarity between stored book_summaries and all hypothetical_summaries
|
10 |
|
11 |
Returns:
|
@@ -14,7 +14,9 @@ def computes_similarity_w_hypothetical(hypothetical_summaries: list[str]) -> (np
|
|
14 |
|
15 |
Ranks of the books summaries based on above consine similarity Distance; Lower ranks means more similar
|
16 |
'''
|
17 |
-
global books_summaries_embs
|
|
|
|
|
18 |
hypothetical_summaries_embs = get_embeddings(hypothetical_summaries)
|
19 |
similarity: torch.Tensor = model.similarity(books_summaries_embs, hypothetical_summaries_embs)
|
20 |
|
|
|
1 |
from z_utils import load_cache_embeddings
|
2 |
+
from z_embedding import load_model, get_embeddings
|
3 |
import torch
|
4 |
import numpy as np
|
5 |
|
6 |
books_summaries_embs = load_cache_embeddings()
|
7 |
|
8 |
+
def computes_similarity_w_hypothetical(hypothetical_summaries: list[str], model = None) -> (np.ndarray, np.ndarray):
|
9 |
'''Computes cosine similarity between stored book_summaries and all hypothetical_summaries
|
10 |
|
11 |
Returns:
|
|
|
14 |
|
15 |
Ranks of the books summaries based on above consine similarity Distance; Lower ranks means more similar
|
16 |
'''
|
17 |
+
global books_summaries_embs
|
18 |
+
model = model if model else load_model()
|
19 |
+
|
20 |
hypothetical_summaries_embs = get_embeddings(hypothetical_summaries)
|
21 |
similarity: torch.Tensor = model.similarity(books_summaries_embs, hypothetical_summaries_embs)
|
22 |
|