Spaces:

junipark
/

gemma_sprint

Paused

halyn commited on Oct 3, 2024

Commit

560975f

1 Parent(s): 9506c20

add do_sample

Files changed (1) hide show

app.py CHANGED Viewed

@@ -38,26 +38,32 @@ def create_knowledge_base(chunks):
 def load_model():
     model_name = "google/gemma-2-2b"  # Hugging Face 모델 ID
     access_token = os.getenv("HF_TOKEN")
     try:
         tokenizer = AutoTokenizer.from_pretrained(model_name, token=access_token, clean_up_tokenization_spaces=False)
         model = AutoModelForCausalLM.from_pretrained(model_name, token=access_token)
-        # 디버깅: GPU/CPU 확인 및 출력
         if torch.cuda.is_available():
-            print("Using GPU")
             device = 0
         else:
-            print("Using CPU")
             device = -1
-        # 디버깅: device 출력
-        print(f"Device: {device}")
-        return pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=150, temperature=0.1, device=device)
     except Exception as e:
         print(f"Error loading model: {e}")
         return None
 # 모델 응답 처리
 def get_response_from_model(prompt):
     try:

 def load_model():
     model_name = "google/gemma-2-2b"  # Hugging Face 모델 ID
     access_token = os.getenv("HF_TOKEN")
     try:
         tokenizer = AutoTokenizer.from_pretrained(model_name, token=access_token, clean_up_tokenization_spaces=False)
         model = AutoModelForCausalLM.from_pretrained(model_name, token=access_token)
+        # 디바이스 설정
         if torch.cuda.is_available():
             device = 0
         else:
             device = -1
+        # `do_sample`을 True로 설정
+        return pipeline(
+            "text-generation",
+            model=model,
+            tokenizer=tokenizer,
+            max_new_tokens=150,
+            temperature=0.1,
+            do_sample=True,  # 이 설정 추가
+            device=device
+        )
     except Exception as e:
         print(f"Error loading model: {e}")
         return None
 # 모델 응답 처리
 def get_response_from_model(prompt):
     try: