Robostar commited on
Commit
f433466
·
verified ·
1 Parent(s): 7b02262

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -13
app.py CHANGED
@@ -1,11 +1,10 @@
1
  from fastapi import FastAPI
2
  from pydantic import BaseModel
3
- from transformers import AutoModelForCausalLM, AutoTokenizer
4
- import torch
5
 
6
  app = FastAPI()
7
 
8
-
9
  # Charger le modèle et le tokenizer
10
  #model_name = "mistralai/Mistral-7B-Instruct-v0.1" # Modèle Mistral 7B
11
  #model_name = "HuggingFaceH4/zephyr-3b"
@@ -14,20 +13,21 @@ app = FastAPI()
14
  #model_name = "microsoft/phi-2"
15
  model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" # You can switch to Phi-2, OpenChat, etc.
16
 
17
- # Load tokenizer & model
18
- tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
19
- model = AutoModelForCausalLM.from_pretrained(
20
- model_name,
21
- torch_dtype=torch.float32, # Use float32 for CPU compatibility
22
- ).to("cpu") # Force CPU use
23
 
24
- # Request format
25
  class ChatRequest(BaseModel):
26
  message: str
27
 
28
  @app.post("/chat")
29
  async def chat(request: ChatRequest):
30
- inputs = tokenizer(request.message, return_tensors="pt").to("cpu") # Send input to CPU
31
- output = model.generate(**inputs, max_length=100)
32
- response = tokenizer.decode(output[0], skip_special_tokens=True)
33
  return {"response": response}
 
 
 
 
 
 
 
 
1
  from fastapi import FastAPI
2
  from pydantic import BaseModel
3
+ from huggingface_hub import InferenceClient
 
4
 
5
  app = FastAPI()
6
 
7
+ # Use Hugging Face Inference API (Replace model name if needed)
8
  # Charger le modèle et le tokenizer
9
  #model_name = "mistralai/Mistral-7B-Instruct-v0.1" # Modèle Mistral 7B
10
  #model_name = "HuggingFaceH4/zephyr-3b"
 
13
  #model_name = "microsoft/phi-2"
14
  model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" # You can switch to Phi-2, OpenChat, etc.
15
 
16
+ client = InferenceClient(model_name)
 
 
 
 
 
17
 
18
+ # Define request format
19
  class ChatRequest(BaseModel):
20
  message: str
21
 
22
  @app.post("/chat")
23
  async def chat(request: ChatRequest):
24
+ # Send message to Hugging Face Inference API
25
+ response = client.text_generation(request.message, max_new_tokens=100)
 
26
  return {"response": response}
27
+
28
+
29
+
30
+
31
+
32
+
33
+