Spaces:
Runtime error
Runtime error
File size: 2,329 Bytes
3933325 f2d5f3c 1df4f0c f2d5f3c 3933325 316f95c 3933325 1df4f0c f2d5f3c 1df4f0c 4990d4a dfc7ee9 316f95c 60e4f79 00b72fa dbbf724 8569dd4 dfc7ee9 d557d40 3933325 7745907 1839808 3933325 1839808 3933325 1df4f0c f2d5f3c 1df4f0c ff93d85 1df4f0c f2d5f3c 1df4f0c 3933325 2103519 7745907 2103519 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
"""Model hosted on Hugging face.
Based on: https://huggingface.co/docs/hub/spaces-sdks-docker-first-demo
"""
from fastapi import FastAPI, Request
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from transformers import T5Tokenizer, T5ForConditionalGeneration
# import gpt4free
# from gpt4free import Provider, forefront
token_size_limit = None
# FROM: https://huggingface.co/facebook/blenderbot-400M-distill?text=Hey+my+name+is+Thomas%21+How+are+you%3F
# LAST USED
tokenizer = AutoTokenizer.from_pretrained("facebook/blenderbot-400M-distill")
model = AutoModelForSeq2SeqLM.from_pretrained("facebook/blenderbot-400M-distill")
# tokenizer = AutoTokenizer.from_pretrained("facebook/blenderbot-1B-distill")
# model = AutoModelForSeq2SeqLM.from_pretrained("facebook/blenderbot-1B-distill")
# token_size_limit = 128
# T5 model can use "any" sequence lenghth, but memory usage is O(L^2).
# tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-small")
# model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-small")
# tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-base")
# model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-base")
# tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-large")
# model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-large")
token_size_limit = 512
# Too large for 16GB
# tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-xl")
# model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-xl")
app = FastAPI()
# { msg: string, temperature: float, max_length: number }
@app.post('/reply')
async def Reply(req: Request):
request = await req.json()
msg = request.get('msg')
print(f'MSG: {msg}')
# Hugging face
input_ids = tokenizer(msg, return_tensors='pt').input_ids # .to('cuda')
output = model.generate(
input_ids[:, -token_size_limit:],
do_sample=True,
temperature=request.get('temperature', 0.9),
max_length=request.get('max_length', 100),
)
reply = tokenizer.batch_decode(output)[0]
# It doesn't really work.
# gpt4free
# usage theb
# reply = gpt4free.Completion.create(Provider.Theb, prompt=msg)
print(f'REPLY: {reply}')
return {'reply': reply}
@app.get("/")
def read_root():
return {"Hello": "World!"}
|