|
import uvicorn |
|
from fastapi import FastAPI, HTTPException |
|
from pydantic import BaseModel |
|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
import torch |
|
|
|
|
|
app = FastAPI() |
|
|
|
|
|
model_name = "Qwen/Qwen2.5-0.5B" |
|
try: |
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
model = AutoModelForCausalLM.from_pretrained( |
|
model_name, |
|
torch_dtype="auto", |
|
device_map="auto", |
|
attn_implementation="eager" |
|
) |
|
print("Model and tokenizer loaded successfully!") |
|
except Exception as e: |
|
print(f"Error loading model: {e}") |
|
raise |
|
|
|
|
|
class TextInput(BaseModel): |
|
prompt: str |
|
max_length: int = 100 |
|
|
|
|
|
@app.post("/generate") |
|
async def generate_text(input: TextInput): |
|
try: |
|
|
|
inputs = tokenizer(input.prompt, return_tensors="pt").to(model.device) |
|
|
|
|
|
outputs = model.generate( |
|
inputs["input_ids"], |
|
max_length=input.max_length, |
|
num_return_sequences=1, |
|
no_repeat_ngram_size=2, |
|
do_sample=True, |
|
top_k=50, |
|
top_p=0.95 |
|
) |
|
|
|
|
|
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
return {"generated_text": generated_text} |
|
except Exception as e: |
|
raise HTTPException(status_code=500, detail=str(e)) |
|
|
|
|
|
@app.get("/") |
|
async def root(): |
|
return {"message": "Qwen2.5-0.5B API is running!"} |
|
|
|
|
|
if __name__ == "__main__": |
|
uvicorn.run(app, host="0.0.0.0", port=7860) |