Thinking / app.py
mike23415's picture
Create app.py
6f93dce verified
raw
history blame
1.77 kB
from flask import Flask, request, jsonify
from flask_cors import CORS
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
app = Flask(__name__)
CORS(app)
# Model configuration
MODEL_NAME = "deepseek-ai/deepseek-r1-6b-chat"
MAX_NEW_TOKENS = 512
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
# Initialize model and tokenizer
try:
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
device_map="auto",
torch_dtype=torch.bfloat16 if DEVICE == "cuda" else torch.float32
)
print("Model loaded successfully!")
except Exception as e:
print(f"Model loading failed: {str(e)}")
model = None
def generate_response(prompt):
inputs = tokenizer(prompt, return_tensors="pt").to(DEVICE)
outputs = model.generate(
**inputs,
max_new_tokens=MAX_NEW_TOKENS,
do_sample=True,
temperature=0.7,
top_p=0.9,
pad_token_id=tokenizer.eos_token_id
)
return tokenizer.decode(outputs[0], skip_special_tokens=True)
@app.route('/chat', methods=['POST'])
def chat():
if not model:
return jsonify({"error": "Model not loaded"}), 500
data = request.json
prompt = data.get("prompt", "")
if not prompt:
return jsonify({"error": "No prompt provided"}), 400
try:
response = generate_response(prompt)
return jsonify({"response": response})
except Exception as e:
return jsonify({"error": str(e)}), 500
@app.route('/health', methods=['GET'])
def health_check():
status = "ready" if model else "unavailable"
return jsonify({"status": status})
if __name__ == '__main__':
app.run(host='0.0.0.0', port=5000)