Moha782 commited on
Commit
daeb152
·
verified ·
1 Parent(s): de394ef

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -5
app.py CHANGED
@@ -19,8 +19,8 @@ with open(pdf_path, 'rb') as f:
19
  split_pattern = r'\n\n'
20
  doc_chunks = re.split(split_pattern, pdf_text)
21
 
22
- # Create the retriever input
23
- corpus = [{"text": chunk} for chunk in doc_chunks]
24
 
25
  """
26
  For more information on huggingface_hub Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
@@ -46,11 +46,20 @@ def respond(
46
 
47
  messages.append({"role": "user", "content": message})
48
 
49
- # Tokenize the input and retrieve relevant context from the PDF
50
  inputs = rag_tokenizer(message, return_tensors="pt")
51
- inputs.update({"corpus": corpus})
52
  input_ids = inputs.pop("input_ids")
53
- output_ids = rag_model.generate(**inputs, max_length=max_tokens, temperature=temperature, top_p=top_p, num_beams=2)
 
 
 
 
 
 
 
 
 
 
54
  retrieved_context = rag_tokenizer.batch_decode(output_ids, skip_special_tokens=True)[0]
55
 
56
  response = ""
 
19
  split_pattern = r'\n\n'
20
  doc_chunks = re.split(split_pattern, pdf_text)
21
 
22
+ # Preprocess the corpus
23
+ corpus = rag_tokenizer(doc_chunks, return_tensors="pt", padding=True, truncation=True).input_ids
24
 
25
  """
26
  For more information on huggingface_hub Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 
46
 
47
  messages.append({"role": "user", "content": message})
48
 
49
+ # Tokenize the input
50
  inputs = rag_tokenizer(message, return_tensors="pt")
 
51
  input_ids = inputs.pop("input_ids")
52
+
53
+ # Generate with the RAG model
54
+ output_ids = rag_model.generate(
55
+ input_ids=input_ids,
56
+ context_input_ids=corpus,
57
+ max_length=max_tokens,
58
+ do_sample=True,
59
+ top_p=top_p,
60
+ top_k=0,
61
+ num_beams=2,
62
+ )
63
  retrieved_context = rag_tokenizer.batch_decode(output_ids, skip_special_tokens=True)[0]
64
 
65
  response = ""