Spaces:

Robostar
/

french_ml

Paused

Robostar commited on Feb 2

Commit

7b02262

verified ·

1 Parent(s): 1db6497

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -5,25 +5,29 @@ import torch
 app = FastAPI()
 # Charger le modèle et le tokenizer
 #model_name = "mistralai/Mistral-7B-Instruct-v0.1"  # Modèle Mistral 7B
 #model_name = "HuggingFaceH4/zephyr-3b"
 #model_name = "serkanarslan/mistral-7b-mini-ft"
-model_name = "microsoft/phi-2"
-tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForCausalLM.from_pretrained(
     model_name,
-    torch_dtype=torch.float16,
-    device_map="auto"  # Utilise le GPU si dispo
-)
-# Définir le format des requêtes
 class ChatRequest(BaseModel):
     message: str
 @app.post("/chat")
 async def chat(request: ChatRequest):
-    inputs = tokenizer(request.message, return_tensors="pt").to("cuda")
     output = model.generate(**inputs, max_length=100)
     response = tokenizer.decode(output[0], skip_special_tokens=True)
     return {"response": response}

 app = FastAPI()
 # Charger le modèle et le tokenizer
 #model_name = "mistralai/Mistral-7B-Instruct-v0.1"  # Modèle Mistral 7B
 #model_name = "HuggingFaceH4/zephyr-3b"
 #model_name = "serkanarslan/mistral-7b-mini-ft"
+# Choose a smaller model for free-tier
+#model_name = "microsoft/phi-2"
+model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"  # You can switch to Phi-2, OpenChat, etc.
+# Load tokenizer & model
+tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
 model = AutoModelForCausalLM.from_pretrained(
     model_name,
+    torch_dtype=torch.float32,  # Use float32 for CPU compatibility
+).to("cpu")  # Force CPU use
+# Request format
 class ChatRequest(BaseModel):
     message: str
 @app.post("/chat")
 async def chat(request: ChatRequest):
+    inputs = tokenizer(request.message, return_tensors="pt").to("cpu")  # Send input to CPU
     output = model.generate(**inputs, max_length=100)
     response = tokenizer.decode(output[0], skip_special_tokens=True)
     return {"response": response}