Deepak Sahu commited on
Commit
f2b9b39
·
1 Parent(s): e446a52

code cleanup

Browse files
Files changed (3) hide show
  1. README.md +2 -2
  2. z_hypothetical_summary.py +8 -8
  3. z_similarity.py +2 -0
README.md CHANGED
@@ -216,9 +216,9 @@ Before discussing evaluation metric let me walk you through two important pieces
216
 
217
  ### Recommendation Generation
218
 
219
- The generation is handled by script `z_hypothetical_summary.py`. Under the hood following happens
220
 
221
- ![image](https://github.com/user-attachments/assets/ee174c38-a1f3-438a-afb8-be2888c590da)
222
 
223
  Code Preview. I did the minimal post processing to chop of the `prompt` from the generated summaries before returning the result.
224
 
 
216
 
217
  ### Recommendation Generation
218
 
219
+ The generation is handled by functions in script `z_hypothetical_summary.py`. Under the hood following happens
220
 
221
+ ![image](.resources/eval1.png)
222
 
223
  Code Preview. I did the minimal post processing to chop of the `prompt` from the generated summaries before returning the result.
224
 
z_hypothetical_summary.py CHANGED
@@ -11,8 +11,12 @@ TRAINED_CASUAL_MODEL = "LunaticMaestro/gpt2-book-summary-generator"
11
  generator_model = None
12
 
13
  def load_model():
 
14
  global generator_model
15
- generator_model = pipeline('text-generation', model=TRAINED_CASUAL_MODEL)
 
 
 
16
 
17
 
18
  def generate_summaries(book_title: str, genre: Optional[str] = None, n_samples=2, top_k = 50, top_p = 0.85, model=None) -> list[str]:
@@ -23,17 +27,13 @@ def generate_summaries(book_title: str, genre: Optional[str] = None, n_samples=2
23
  n_samples: (default=2) count of hypothetical summaries
24
  top_k: (default = 50)
25
  top_p: (default=0.85)
26
-
27
  model: CASUAL LM; this is a hack to adjust for faster response in gradio
 
28
  Returns:
29
  summaries: list of hypothetical summaries.
30
  '''
31
- global generator_model
32
-
33
- if model:
34
- generator_model = model
35
- else:
36
- generator_model = generator_model if generator_model is not None else load_model()
37
 
38
  # basic prompt very similary to one used in fine-tuning
39
  prompt = f'''Book Title: {book_title}
 
11
  generator_model = None
12
 
13
  def load_model():
14
+ '''Work around to speed up HF cross-script loading'''
15
  global generator_model
16
+ if generator_model is None:
17
+ generator_model = pipeline('text-generation', model=TRAINED_CASUAL_MODEL)
18
+ else:
19
+ return generator_model
20
 
21
 
22
  def generate_summaries(book_title: str, genre: Optional[str] = None, n_samples=2, top_k = 50, top_p = 0.85, model=None) -> list[str]:
 
27
  n_samples: (default=2) count of hypothetical summaries
28
  top_k: (default = 50)
29
  top_p: (default=0.85)
 
30
  model: CASUAL LM; this is a hack to adjust for faster response in gradio
31
+
32
  Returns:
33
  summaries: list of hypothetical summaries.
34
  '''
35
+ # select model
36
+ generator_model = model if model else generator_model
 
 
 
 
37
 
38
  # basic prompt very similary to one used in fine-tuning
39
  prompt = f'''Book Title: {book_title}
z_similarity.py CHANGED
@@ -15,6 +15,8 @@ def computes_similarity_w_hypothetical(hypothetical_summaries: list[str], model
15
  Ranks of the books summaries based on above consine similarity Distance; Lower ranks means more similar
16
  '''
17
  global books_summaries_embs
 
 
18
  model = model if model else load_model()
19
 
20
  hypothetical_summaries_embs = get_embeddings(hypothetical_summaries)
 
15
  Ranks of the books summaries based on above consine similarity Distance; Lower ranks means more similar
16
  '''
17
  global books_summaries_embs
18
+
19
+ # Select model
20
  model = model if model else load_model()
21
 
22
  hypothetical_summaries_embs = get_embeddings(hypothetical_summaries)