Chat_with_Meta_llama3_8b

Sleeping

OnlyCheeini commited on Aug 27, 2024

Commit

78f2093

verified ·

1 Parent(s): 301b6fb

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -12,7 +12,10 @@ app = FastAPI()
 # Load your fine-tuned model and tokenizer
 model_name = "OnlyCheeini/greesychat-turbo"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16).to("cuda")
 class OpenAIRequest(BaseModel):
     model: str
@@ -29,7 +32,7 @@ async def generate_text(request: OpenAIRequest):
     if request.model != model_name:
         raise HTTPException(status_code=400, detail="Model not found")
-    inputs = tokenizer(request.prompt, return_tensors="pt").to("cuda")
     outputs = model.generate(
         **inputs,
         max_length=inputs['input_ids'].shape[1] + request.max_tokens,

 # Load your fine-tuned model and tokenizer
 model_name = "OnlyCheeini/greesychat-turbo"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
+# Check if a GPU is available, otherwise use CPU
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32).to(device)
 class OpenAIRequest(BaseModel):
     model: str
     if request.model != model_name:
         raise HTTPException(status_code=400, detail="Model not found")
+    inputs = tokenizer(request.prompt, return_tensors="pt").to(device)
     outputs = model.generate(
         **inputs,
         max_length=inputs['input_ids'].shape[1] + request.max_tokens,