from transformers import AutoModelForCausalLM, AutoTokenizer from flask import Flask, request, jsonify device = "cuda" # the device to load the model onto from transformers import set_cache_dir set_cache_dir("/code/.cache/huggingface") model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1") tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1") @app.route('/recommend', methods=['POST']) def recommendation(): content = request.json user_degree = content.get('degree') user_stream = content.get('stream') user_semester = content.get('semester') messages = [ {"role": "user", "content": f""" You need to act like as recommendataion engine for course recommendation based on below details. Degree: {user_degree} Stream: {user_stream} Current Semester: {user_semester} Based on above details recommend the courses that realtes to above details Note: Output should bevalid json format in below format: {{"course1:ABC,course2:DEF,course3:XYZ,...}} """}, ] encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt") model_inputs = encodeds.to(device) model.to(device) generated_ids = model.generate(model_inputs, max_new_tokens=1000, do_sample=True) decoded = tokenizer.batch_decode(generated_ids) return jsonify({"res":decoded[0]}) if __name__ == '__main__': app.run(debug=True)