Spaces:

seawolf2357
/

rag

Build error

seawolf2357 commited on May 1, 2024

Commit

dbd7c99

verified ·

1 Parent(s): 4ad9b62

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -9,7 +9,13 @@ from accelerate import Accelerator
 hf_api_key = os.getenv('HF_API_KEY')
 model_id = "microsoft/phi-2"
 tokenizer = AutoTokenizer.from_pretrained(model_id, token=hf_api_key, trust_remote_code=True)
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
     token=hf_api_key,
@@ -27,14 +33,13 @@ data = data.add_faiss_index("embeddings")
 def generate(formatted_prompt):
     prompt_text = f"{SYS_PROMPT} {formatted_prompt}"
-    # 토크나이징 시 attention_mask도 함께 생성
     encoding = tokenizer(prompt_text, return_tensors="pt", padding="max_length", max_length=512, truncation=True)
     input_ids = encoding['input_ids'].to(accelerator.device)
     attention_mask = encoding['attention_mask'].to(accelerator.device)
     outputs = model.generate(
         input_ids,
-        attention_mask=attention_mask,  # attention_mask 전달
         max_new_tokens=1024,
         eos_token_id=tokenizer.eos_token_id,
         do_sample=True,
@@ -43,7 +48,6 @@ def generate(formatted_prompt):
     )
     return tokenizer.decode(outputs[0], skip_special_tokens=True)
 def search(query: str, k: int = 3):
     embedded_query = ST.encode(query)
     scores, retrieved_examples = data.get_nearest_examples("embeddings", embedded_query, k=k)

 hf_api_key = os.getenv('HF_API_KEY')
 model_id = "microsoft/phi-2"
+# 토크나이저 및 모델 설정
 tokenizer = AutoTokenizer.from_pretrained(model_id, token=hf_api_key, trust_remote_code=True)
+# 패딩 토큰 설정
+if tokenizer.pad_token is None:
+    tokenizer.pad_token = tokenizer.eos_token
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
     token=hf_api_key,
 def generate(formatted_prompt):
     prompt_text = f"{SYS_PROMPT} {formatted_prompt}"
     encoding = tokenizer(prompt_text, return_tensors="pt", padding="max_length", max_length=512, truncation=True)
     input_ids = encoding['input_ids'].to(accelerator.device)
     attention_mask = encoding['attention_mask'].to(accelerator.device)
     outputs = model.generate(
         input_ids,
+        attention_mask=attention_mask,
         max_new_tokens=1024,
         eos_token_id=tokenizer.eos_token_id,
         do_sample=True,
     )
     return tokenizer.decode(outputs[0], skip_special_tokens=True)
 def search(query: str, k: int = 3):
     embedded_query = ST.encode(query)
     scores, retrieved_examples = data.get_nearest_examples("embeddings", embedded_query, k=k)