seawolf2357 commited on
Commit
363bbc4
Β·
verified Β·
1 Parent(s): 000e3df

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -4
app.py CHANGED
@@ -52,9 +52,11 @@ def format_prompt(prompt, retrieved_documents, k):
52
  return PROMPT
53
 
54
  def generate(formatted_prompt):
55
- formatted_prompt = formatted_prompt[:2000] # Limit due to GPU memory constraints
56
- messages = [{"role": "system", "content": "You are an assistant..."}, {"role": "user", "content": formatted_prompt}]
57
- input_ids = tokenizer(messages, return_tensors="pt", padding=True).input_ids.to(accelerator.device)
 
 
58
  outputs = model.generate(
59
  input_ids,
60
  max_new_tokens=1024,
@@ -63,7 +65,10 @@ def generate(formatted_prompt):
63
  temperature=0.6,
64
  top_p=0.9
65
  )
66
- return tokenizer.decode(outputs[0][input_ids.shape[-1]:], skip_special_tokens=True)
 
 
 
67
 
68
  def rag_chatbot_interface(prompt: str, k: int = 2):
69
  scores, retrieved_documents = search(prompt, k)
 
52
  return PROMPT
53
 
54
  def generate(formatted_prompt):
55
+ # ν”„λ‘¬ν”„νŠΈλ₯Ό λ¬Έμžμ—΄λ‘œ κ²°ν•©
56
+ prompt_text = f"{SYS_PROMPT} {formatted_prompt}"
57
+ # ν† ν¬λ‚˜μ΄μ§•
58
+ input_ids = tokenizer(prompt_text, return_tensors="pt", padding=True).input_ids.to(accelerator.device)
59
+ # 응닡 생성
60
  outputs = model.generate(
61
  input_ids,
62
  max_new_tokens=1024,
 
65
  temperature=0.6,
66
  top_p=0.9
67
  )
68
+ # 응닡 ν…μŠ€νŠΈλ‘œ λ””μ½”λ”©
69
+ response = tokenizer.decode(outputs[0][input_ids.shape[-1]:], skip_special_tokens=True)
70
+ return response
71
+
72
 
73
  def rag_chatbot_interface(prompt: str, k: int = 2):
74
  scores, retrieved_documents = search(prompt, k)