File size: 2,329 Bytes
3933325
 
 
 
 
 
 
f2d5f3c
 
1df4f0c
f2d5f3c
 
3933325
 
316f95c
 
3933325
1df4f0c
 
f2d5f3c
 
1df4f0c
4990d4a
 
dfc7ee9
316f95c
 
60e4f79
00b72fa
dbbf724
 
8569dd4
 
dfc7ee9
d557d40
 
 
 
3933325
7745907
 
 
 
1839808
3933325
 
 
1839808
3933325
 
1df4f0c
f2d5f3c
 
 
 
 
 
 
 
1df4f0c
ff93d85
1df4f0c
 
f2d5f3c
1df4f0c
3933325
 
 
 
2103519
7745907
2103519
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
"""Model hosted on Hugging face.

Based on: https://huggingface.co/docs/hub/spaces-sdks-docker-first-demo
"""

from fastapi import FastAPI, Request

from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from transformers import T5Tokenizer, T5ForConditionalGeneration

# import gpt4free
# from gpt4free import Provider, forefront


token_size_limit = None

# FROM: https://huggingface.co/facebook/blenderbot-400M-distill?text=Hey+my+name+is+Thomas%21+How+are+you%3F

# LAST USED
tokenizer = AutoTokenizer.from_pretrained("facebook/blenderbot-400M-distill")
model = AutoModelForSeq2SeqLM.from_pretrained("facebook/blenderbot-400M-distill")

# tokenizer = AutoTokenizer.from_pretrained("facebook/blenderbot-1B-distill")
# model = AutoModelForSeq2SeqLM.from_pretrained("facebook/blenderbot-1B-distill")
# token_size_limit = 128

# T5 model can use "any" sequence lenghth, but memory usage is O(L^2).
# tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-small")
# model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-small")
# tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-base")
# model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-base")
# tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-large")
# model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-large")
token_size_limit = 512

# Too large for 16GB
# tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-xl")
# model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-xl")


app = FastAPI()


# { msg: string, temperature: float, max_length: number }
@app.post('/reply')
async def Reply(req: Request):
    request = await req.json()
    msg = request.get('msg')
    print(f'MSG: {msg}')

    # Hugging face
    input_ids = tokenizer(msg, return_tensors='pt').input_ids  # .to('cuda')
    output = model.generate(
        input_ids[:, -token_size_limit:],
        do_sample=True,
        temperature=request.get('temperature', 0.9),
        max_length=request.get('max_length', 100),
    )
    reply = tokenizer.batch_decode(output)[0]

    # It doesn't really work.
    # gpt4free
    # usage theb
    # reply = gpt4free.Completion.create(Provider.Theb, prompt=msg)

    print(f'REPLY: {reply}')
    return {'reply': reply}


@app.get("/")
def read_root():
    return {"Hello": "World!"}