halyn commited on
Commit
560975f
·
1 Parent(s): 9506c20

add do_sample

Browse files
Files changed (1) hide show
  1. app.py +14 -8
app.py CHANGED
@@ -38,26 +38,32 @@ def create_knowledge_base(chunks):
38
  def load_model():
39
  model_name = "google/gemma-2-2b" # Hugging Face 모델 ID
40
  access_token = os.getenv("HF_TOKEN")
 
41
  try:
42
  tokenizer = AutoTokenizer.from_pretrained(model_name, token=access_token, clean_up_tokenization_spaces=False)
43
  model = AutoModelForCausalLM.from_pretrained(model_name, token=access_token)
44
 
45
- # 디버깅: GPU/CPU 확인 및 출력
46
  if torch.cuda.is_available():
47
- print("Using GPU")
48
  device = 0
49
  else:
50
- print("Using CPU")
51
  device = -1
52
 
53
- # 디버깅: device 출력
54
- print(f"Device: {device}")
55
-
56
- return pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=150, temperature=0.1, device=device)
 
 
 
 
 
 
 
57
  except Exception as e:
58
  print(f"Error loading model: {e}")
59
  return None
60
-
61
  # 모델 응답 처리
62
  def get_response_from_model(prompt):
63
  try:
 
38
  def load_model():
39
  model_name = "google/gemma-2-2b" # Hugging Face 모델 ID
40
  access_token = os.getenv("HF_TOKEN")
41
+
42
  try:
43
  tokenizer = AutoTokenizer.from_pretrained(model_name, token=access_token, clean_up_tokenization_spaces=False)
44
  model = AutoModelForCausalLM.from_pretrained(model_name, token=access_token)
45
 
46
+ # 디바이스 설정
47
  if torch.cuda.is_available():
 
48
  device = 0
49
  else:
 
50
  device = -1
51
 
52
+ # `do_sample`을 True로 설정
53
+ return pipeline(
54
+ "text-generation",
55
+ model=model,
56
+ tokenizer=tokenizer,
57
+ max_new_tokens=150,
58
+ temperature=0.1,
59
+ do_sample=True, # 이 설정 추가
60
+ device=device
61
+ )
62
+
63
  except Exception as e:
64
  print(f"Error loading model: {e}")
65
  return None
66
+
67
  # 모델 응답 처리
68
  def get_response_from_model(prompt):
69
  try: