Artples commited on
Commit
dd389e3
·
verified ·
1 Parent(s): ff1c198

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -2
app.py CHANGED
@@ -1,3 +1,37 @@
1
- import gradio as gr
 
 
 
2
 
3
- gr.load("models/Artples/L-MChat-Small").launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from pydantic import BaseModel
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer
4
+ import torch
5
 
6
+ class UserRequest(BaseModel):
7
+ prompt: str
8
+
9
+ app = FastAPI()
10
+
11
+ # Load the model and tokenizer
12
+ model_name = "Artples/L-MChat-7b"
13
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
14
+ model = AutoModelForCausalLM.from_pretrained(model_name)
15
+
16
+ # Make sure the model is on CPU
17
+ device = torch.device("cpu")
18
+ model.to(device)
19
+
20
+ @app.post("/generate/")
21
+ async def generate(request: UserRequest):
22
+ try:
23
+ # Tokenize the prompt
24
+ inputs = tokenizer.encode(request.prompt, return_tensors="pt")
25
+ inputs = inputs.to(device)
26
+
27
+ # Generate a response from the model
28
+ output = model.generate(inputs, max_length=100, num_return_sequences=1)
29
+ response_text = tokenizer.decode(output[0], skip_special_tokens=True)
30
+
31
+ return {"response": response_text}
32
+ except Exception as e:
33
+ raise HTTPException(status_code=500, detail=str(e))
34
+
35
+ if __name__ == "__main__":
36
+ import uvicorn
37
+ uvicorn.run(app, host="0.0.0.0", port=8000)