Spaces:

pszemraj
/

document-summarization

Running on CPU Upgrade

App Files Files Community

pszemraj commited on Oct 4, 2022

Commit

9350787

1 Parent(s): 87e5c9c

🚧 update for longt5

Browse files

Signed-off-by: peter szemraj <[email protected]>

Files changed (1) hide show

summarize.py +23 -13

summarize.py CHANGED Viewed

@@ -15,20 +15,19 @@ def load_model_and_tokenizer(model_name):
         AutoModelForSeq2SeqLM: the model
         AutoTokenizer: the tokenizer
     """
     model = AutoModelForSeq2SeqLM.from_pretrained(
         model_name,
         # low_cpu_mem_usage=True,
         # use_cache=False,
-    )
     tokenizer = AutoTokenizer.from_pretrained(model_name)
-    model = model.to("cuda") if torch.cuda.is_available() else model
-    logging.info(f"Loaded model {model_name}")
     return model, tokenizer
-def summarize_and_score(ids, mask, model, tokenizer, **kwargs):
     """
     summarize_and_score - given a batch of ids and a mask, return a summary and a score for the summary
@@ -37,6 +36,7 @@ def summarize_and_score(ids, mask, model, tokenizer, **kwargs):
         mask (): the attention mask for the batch
         model   (): the model to use for summarization
         tokenizer (): the tokenizer to use for summarization
     Returns:
         str: the summary of the batch
@@ -52,14 +52,23 @@ def summarize_and_score(ids, mask, model, tokenizer, **kwargs):
     # put global attention on <s> token
     global_attention_mask[:, 0] = 1
-    summary_pred_ids = model.generate(
-        input_ids,
-        attention_mask=attention_mask,
-        global_attention_mask=global_attention_mask,
-        output_scores=True,
-        return_dict_in_generate=True,
-        **kwargs,
-    )
     summary = tokenizer.batch_decode(
         summary_pred_ids.sequences,
         skip_special_tokens=True,
@@ -70,6 +79,7 @@ def summarize_and_score(ids, mask, model, tokenizer, **kwargs):
     return summary, score
 def summarize_via_tokenbatches(
     input_text: str,
     model,

         AutoModelForSeq2SeqLM: the model
         AutoTokenizer: the tokenizer
     """
+    device = "cuda" if torch.cuda.is_available() else "cpu"
     model = AutoModelForSeq2SeqLM.from_pretrained(
         model_name,
         # low_cpu_mem_usage=True,
         # use_cache=False,
+    ).to(device)
     tokenizer = AutoTokenizer.from_pretrained(model_name)
+    logging.info(f"Loaded model {model_name} to {device}")
     return model, tokenizer
+def summarize_and_score(ids, mask, model, tokenizer, is_general_attention_model=True, **kwargs):
     """
     summarize_and_score - given a batch of ids and a mask, return a summary and a score for the summary
         mask (): the attention mask for the batch
         model   (): the model to use for summarization
         tokenizer (): the tokenizer to use for summarization
+        is_general_attention_model (bool, optional): whether the model is a general attention model. Defaults to True.
     Returns:
         str: the summary of the batch
     # put global attention on <s> token
     global_attention_mask[:, 0] = 1
+    if is_general_attention_model:
+        summary_pred_ids = model.generate(
+            input_ids,
+            attention_mask=attention_mask,
+            output_scores=True,
+            return_dict_in_generate=True,
+            **kwargs,
+        )
+    else:
+        summary_pred_ids = model.generate(
+            input_ids,
+            attention_mask=attention_mask,
+            global_attention_mask=global_attention_mask,
+            output_scores=True,
+            return_dict_in_generate=True,
+            **kwargs,
+        )
     summary = tokenizer.batch_decode(
         summary_pred_ids.sequences,
         skip_special_tokens=True,
     return summary, score
 def summarize_via_tokenbatches(
     input_text: str,
     model,