Spaces:
Paused
Paused
add do_sample
Browse files
app.py
CHANGED
@@ -38,26 +38,32 @@ def create_knowledge_base(chunks):
|
|
38 |
def load_model():
|
39 |
model_name = "google/gemma-2-2b" # Hugging Face 모델 ID
|
40 |
access_token = os.getenv("HF_TOKEN")
|
|
|
41 |
try:
|
42 |
tokenizer = AutoTokenizer.from_pretrained(model_name, token=access_token, clean_up_tokenization_spaces=False)
|
43 |
model = AutoModelForCausalLM.from_pretrained(model_name, token=access_token)
|
44 |
|
45 |
-
#
|
46 |
if torch.cuda.is_available():
|
47 |
-
print("Using GPU")
|
48 |
device = 0
|
49 |
else:
|
50 |
-
print("Using CPU")
|
51 |
device = -1
|
52 |
|
53 |
-
#
|
54 |
-
|
55 |
-
|
56 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
except Exception as e:
|
58 |
print(f"Error loading model: {e}")
|
59 |
return None
|
60 |
-
|
61 |
# 모델 응답 처리
|
62 |
def get_response_from_model(prompt):
|
63 |
try:
|
|
|
38 |
def load_model():
|
39 |
model_name = "google/gemma-2-2b" # Hugging Face 모델 ID
|
40 |
access_token = os.getenv("HF_TOKEN")
|
41 |
+
|
42 |
try:
|
43 |
tokenizer = AutoTokenizer.from_pretrained(model_name, token=access_token, clean_up_tokenization_spaces=False)
|
44 |
model = AutoModelForCausalLM.from_pretrained(model_name, token=access_token)
|
45 |
|
46 |
+
# 디바이스 설정
|
47 |
if torch.cuda.is_available():
|
|
|
48 |
device = 0
|
49 |
else:
|
|
|
50 |
device = -1
|
51 |
|
52 |
+
# `do_sample`을 True로 설정
|
53 |
+
return pipeline(
|
54 |
+
"text-generation",
|
55 |
+
model=model,
|
56 |
+
tokenizer=tokenizer,
|
57 |
+
max_new_tokens=150,
|
58 |
+
temperature=0.1,
|
59 |
+
do_sample=True, # 이 설정 추가
|
60 |
+
device=device
|
61 |
+
)
|
62 |
+
|
63 |
except Exception as e:
|
64 |
print(f"Error loading model: {e}")
|
65 |
return None
|
66 |
+
|
67 |
# 모델 응답 처리
|
68 |
def get_response_from_model(prompt):
|
69 |
try:
|