Spaces:

danishjameel003
/

CSSChatbot

Sleeping

danishjameel003 commited on Jan 27

Commit

efe698b

verified ·

1 Parent(s): 74d69bb

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -23,7 +23,7 @@ def load_pipeline():
     tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="left", trust_remote_code=True)
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
-        torch_dtype=torch.bfloat16,  # Use bfloat16 to reduce memory usage
         device_map="auto",          # Automatically map model to available devices (e.g., GPU if available)
         trust_remote_code=True
     )
@@ -33,7 +33,7 @@ def load_pipeline():
         task="text-generation",
         model=model,
         tokenizer=tokenizer,
-        torch_dtype=torch.bfloat16,
         device_map="auto",
         return_full_text=True  # Required for LangChain compatibility
     )

     tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="left", trust_remote_code=True)
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
+        torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,  # Use float16 for GPU, float32 for CPU
         device_map="auto",          # Automatically map model to available devices (e.g., GPU if available)
         trust_remote_code=True
     )
         task="text-generation",
         model=model,
         tokenizer=tokenizer,
+        torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
         device_map="auto",
         return_full_text=True  # Required for LangChain compatibility
     )