File size: 1,136 Bytes
fec2d9d
 
961ab91
 
f433466
961ab91
 
 
f433466
961ab91
 
 
 
7b02262
 
fec2d9d
 
7b02262
fec2d9d
 
 
961ab91
f433466
961ab91
 
 
 
 
fec2d9d
 
 
 
 
 
 
f433466
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41


from fastapi import FastAPI
from pydantic import BaseModel
from huggingface_hub import InferenceClient

app = FastAPI()

# Use Hugging Face Inference API (Replace model name if needed)
# Charger le modèle et le tokenizer
#model_name = "mistralai/Mistral-7B-Instruct-v0.1"  # Modèle Mistral 7B
#model_name = "HuggingFaceH4/zephyr-3b"
#model_name = "serkanarslan/mistral-7b-mini-ft"
# Choose a smaller model for free-tier
#model_name = "microsoft/phi-2"
#model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"  # You can switch to Phi-2, OpenChat, etc.


# ✅ Use the full Hugging Face Inference API URL
HF_API_URL = "https://api-inference.huggingface.co/models/TinyLlama/TinyLlama-1.1B-Chat-v1.0"
client = InferenceClient(HF_API_URL)

# Define request format
class ChatRequest(BaseModel):
    message: str

@app.post("/chat")
async def chat(request: ChatRequest):
    # ✅ Corrected function call with `model` argument
    response = client.text_generation(
        request.message, 
        model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", 
        max_new_tokens=100
    )
    return {"response": response}  # ✅ Removed extra quote