File size: 922 Bytes
26faa32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import transformers

class LLMInferenceServer:
    def __init__(self, model_name):
        self.model = transformers.AutoModelForCausalLM.from_pretrained(model_name)

    def generate(self, prompt, max_length=100):
        inputs = transformers.InputFeatures(input_ids=[self.model.config.bos_token_id], attention_mask=[1])
        output = self.model.generate(inputs, max_length=max_length, prompt=prompt)
        return output[0]

if __name__ == '__main__':
    model_name = "google/bigbird-roberta-base"
    server = LLMInferenceServer(model_name)

    from flask import Flask, request, jsonify

    app = Flask(__name__)

    @app.route("/generate", methods=["POST"])
    def generate():
        prompt = request.json["prompt"]
        max_length = request.json["max_length"]

        response = server.generate(prompt, max_length)
        return jsonify({"response": response})

    app.run(host="0.0.0.0", port=8000)