Atharva Prashant Pawar
v1
b95f6dc
raw
history blame contribute delete
599 Bytes
from flask import Flask, request, jsonify
import torch
from peft import PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer
import transformers
app = Flask(__name)
@app.route('/api/generate_response', methods=['POST'])
def generate_response():
data = request.get_json()
prompt = data.get('prompt')
token_limit = data.get('token_limit')
# Your model loading and inference code here (from the code you provided)
# ...
responses = mistral_model(prompt, token_limit)
return jsonify({"responses": responses})
if __name__ == "__main__":
app.run()