Spaces:

heesuuuuuu
/

dream_interpreter

Running

heesuuuuuu commited on Jul 21

Commit

a869f0d

verified ·

1 Parent(s): 373f7cd

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -32,7 +32,7 @@ import gradio as gr  # Gradio 임포트 추가
 print("Vector DB와 임베딩 모델을 불러오는 중입니다...")
 index_path = "proj2_voca"
 model_name = "jhgan/ko-sroberta-multitask"
-model_kwargs = {'device': 'cuda'}
 encode_kwargs = {'normalize_embeddings': True}
 embeddings = HuggingFaceEmbeddings(
     model_name=model_name,
@@ -50,7 +50,7 @@ model = AutoModelForCausalLM.from_pretrained(
     model_id,
     torch_dtype=torch.bfloat16,
     device_map=None  # 자동 배치 비활성화
-).to("cuda:2")  # GPU 번호 명시
 # --- 2. 언어 모델(LLM) 불러오기 ---
 # ... (이전 코드 생략) ...
@@ -58,7 +58,7 @@ pipe = pipeline(
     "text-generation",
     model=model,
     tokenizer=tokenizer,
-    device=2,
     max_new_tokens=170,  # 최대 생성 토큰 수 감소
     temperature=0.7,
     repetition_penalty=1.2,  # 반복 방지 패널티 추가

 print("Vector DB와 임베딩 모델을 불러오는 중입니다...")
 index_path = "proj2_voca"
 model_name = "jhgan/ko-sroberta-multitask"
+model_kwargs = {'device': 'cpu'}
 encode_kwargs = {'normalize_embeddings': True}
 embeddings = HuggingFaceEmbeddings(
     model_name=model_name,
     model_id,
     torch_dtype=torch.bfloat16,
     device_map=None  # 자동 배치 비활성화
+).to("cpu")  # GPU 번호 명시
 # --- 2. 언어 모델(LLM) 불러오기 ---
 # ... (이전 코드 생략) ...
     "text-generation",
     model=model,
     tokenizer=tokenizer,
+    device=-1,
     max_new_tokens=170,  # 최대 생성 토큰 수 감소
     temperature=0.7,
     repetition_penalty=1.2,  # 반복 방지 패널티 추가