Spaces:

Robostar
/

french_ml

Paused

Robostar commited on Feb 2

Commit

f433466

verified ·

1 Parent(s): 7b02262

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,11 +1,10 @@
 from fastapi import FastAPI
 from pydantic import BaseModel
-from transformers import AutoModelForCausalLM, AutoTokenizer
-import torch
 app = FastAPI()
 # Charger le modèle et le tokenizer
 #model_name = "mistralai/Mistral-7B-Instruct-v0.1"  # Modèle Mistral 7B
 #model_name = "HuggingFaceH4/zephyr-3b"
@@ -14,20 +13,21 @@ app = FastAPI()
 #model_name = "microsoft/phi-2"
 model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"  # You can switch to Phi-2, OpenChat, etc.
-# Load tokenizer & model
-tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
-model = AutoModelForCausalLM.from_pretrained(
-    model_name,
-    torch_dtype=torch.float32,  # Use float32 for CPU compatibility
-).to("cpu")  # Force CPU use
-# Request format
 class ChatRequest(BaseModel):
     message: str
 @app.post("/chat")
 async def chat(request: ChatRequest):
-    inputs = tokenizer(request.message, return_tensors="pt").to("cpu")  # Send input to CPU
-    output = model.generate(**inputs, max_length=100)
-    response = tokenizer.decode(output[0], skip_special_tokens=True)
     return {"response": response}

 from fastapi import FastAPI
 from pydantic import BaseModel
+from huggingface_hub import InferenceClient
 app = FastAPI()
+# Use Hugging Face Inference API (Replace model name if needed)
 # Charger le modèle et le tokenizer
 #model_name = "mistralai/Mistral-7B-Instruct-v0.1"  # Modèle Mistral 7B
 #model_name = "HuggingFaceH4/zephyr-3b"
 #model_name = "microsoft/phi-2"
 model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"  # You can switch to Phi-2, OpenChat, etc.
+client = InferenceClient(model_name)
+# Define request format
 class ChatRequest(BaseModel):
     message: str
 @app.post("/chat")
 async def chat(request: ChatRequest):
+    # Send message to Hugging Face Inference API
+    response = client.text_generation(request.message, max_new_tokens=100)
     return {"response": response}