Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -19,8 +19,8 @@ with open(pdf_path, 'rb') as f:
|
|
19 |
split_pattern = r'\n\n'
|
20 |
doc_chunks = re.split(split_pattern, pdf_text)
|
21 |
|
22 |
-
#
|
23 |
-
corpus =
|
24 |
|
25 |
"""
|
26 |
For more information on huggingface_hub Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
|
@@ -46,11 +46,20 @@ def respond(
|
|
46 |
|
47 |
messages.append({"role": "user", "content": message})
|
48 |
|
49 |
-
# Tokenize the input
|
50 |
inputs = rag_tokenizer(message, return_tensors="pt")
|
51 |
-
inputs.update({"corpus": corpus})
|
52 |
input_ids = inputs.pop("input_ids")
|
53 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
retrieved_context = rag_tokenizer.batch_decode(output_ids, skip_special_tokens=True)[0]
|
55 |
|
56 |
response = ""
|
|
|
19 |
split_pattern = r'\n\n'
|
20 |
doc_chunks = re.split(split_pattern, pdf_text)
|
21 |
|
22 |
+
# Preprocess the corpus
|
23 |
+
corpus = rag_tokenizer(doc_chunks, return_tensors="pt", padding=True, truncation=True).input_ids
|
24 |
|
25 |
"""
|
26 |
For more information on huggingface_hub Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
|
|
|
46 |
|
47 |
messages.append({"role": "user", "content": message})
|
48 |
|
49 |
+
# Tokenize the input
|
50 |
inputs = rag_tokenizer(message, return_tensors="pt")
|
|
|
51 |
input_ids = inputs.pop("input_ids")
|
52 |
+
|
53 |
+
# Generate with the RAG model
|
54 |
+
output_ids = rag_model.generate(
|
55 |
+
input_ids=input_ids,
|
56 |
+
context_input_ids=corpus,
|
57 |
+
max_length=max_tokens,
|
58 |
+
do_sample=True,
|
59 |
+
top_p=top_p,
|
60 |
+
top_k=0,
|
61 |
+
num_beams=2,
|
62 |
+
)
|
63 |
retrieved_context = rag_tokenizer.batch_decode(output_ids, skip_special_tokens=True)[0]
|
64 |
|
65 |
response = ""
|