Atharva Prashant Pawar
v1
b95f6dc
raw
history blame
599 Bytes
from flask import Flask, request, jsonify
import torch
from peft import PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer
import transformers
app = Flask(__name)
@app.route('/api/generate_response', methods=['POST'])
def generate_response():
data = request.get_json()
prompt = data.get('prompt')
token_limit = data.get('token_limit')
# Your model loading and inference code here (from the code you provided)
# ...
responses = mistral_model(prompt, token_limit)
return jsonify({"responses": responses})
if __name__ == "__main__":
app.run()