Spaces:

Moha782
/

apexcustomsassistant

Sleeping

Moha782 commited on Jun 27, 2024

Commit

986054a

verified ·

1 Parent(s): daeb152

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -5,6 +5,7 @@ from typing import List, Dict, Tuple
 import re
 import os
 import torch
 # Load the RAG model and tokenizer
 rag_tokenizer = RagTokenizer.from_pretrained("facebook/rag-token-nq")
@@ -22,6 +23,12 @@ doc_chunks = re.split(split_pattern, pdf_text)
 # Preprocess the corpus
 corpus = rag_tokenizer(doc_chunks, return_tensors="pt", padding=True, truncation=True).input_ids
 """
 For more information on huggingface_hub Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 """

 import re
 import os
 import torch
+from math import ceil
 # Load the RAG model and tokenizer
 rag_tokenizer = RagTokenizer.from_pretrained("facebook/rag-token-nq")
 # Preprocess the corpus
 corpus = rag_tokenizer(doc_chunks, return_tensors="pt", padding=True, truncation=True).input_ids
+# Pad the corpus to be a multiple of `n_docs`
+n_docs = rag_model.config.n_docs
+corpus_length = corpus.size(-1)
+pad_length = ceil(corpus_length / n_docs) * n_docs - corpus_length
+corpus = torch.nn.functional.pad(corpus, (0, pad_length), mode='constant', value=rag_tokenizer.pad_token_id)
 """
 For more information on huggingface_hub Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 """