test / app.py
joermd's picture
Update app.py
067af65 verified
raw
history blame
1.77 kB
# app.py
from flask import Flask, render_template, request, jsonify
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from functools import lru_cache
app = Flask(__name__)
@lru_cache(maxsize=1)
def load_model():
"""Load model and tokenizer with caching"""
tokenizer = AutoTokenizer.from_pretrained("amd/AMD-OLMo-1B")
model = AutoModelForCausalLM.from_pretrained(
"amd/AMD-OLMo-1B",
torch_dtype=torch.float16,
device_map="auto"
)
return model, tokenizer
def generate_response(prompt, model, tokenizer):
"""Generate response from the model"""
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
with torch.no_grad():
outputs = model.generate(
**inputs,
max_length=200,
num_return_sequences=1,
temperature=0.7,
top_p=0.9,
repetition_penalty=1.2,
pad_token_id=tokenizer.eos_token_id
)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
return response.replace(prompt, "").strip()
@app.route('/')
def home():
return render_template('index.html')
@app.route('/message', methods=['POST'])
def message():
try:
data = request.json
user_message = data.get('message', '')
if not user_message:
return jsonify({"response": "عذراً، لم أفهم رسالتك"})
model, tokenizer = load_model()
response = generate_response(user_message, model, tokenizer)
return jsonify({"response": response})
except Exception as e:
return jsonify({"response": f"عذراً، حدث خطأ: {str(e)}"})
if __name__ == '__main__':
app.run(debug=True)