Spaces:
Sleeping
Sleeping
Rename app.py to main.py
Browse files
app.py
DELETED
@@ -1,62 +0,0 @@
|
|
1 |
-
from fastapi import FastAPI, HTTPException
|
2 |
-
from pydantic import BaseModel
|
3 |
-
from transformers import pipeline
|
4 |
-
|
5 |
-
# 1. Definisikan "otak" aplikasi (sama persis seperti di Gradio)
|
6 |
-
MODEL_CONFIG = {
|
7 |
-
# (Salin MODEL_CONFIG lengkap dari atas ke sini)
|
8 |
-
"Lyon28/GPT-2-Tinny": {"task": "text-generation", "display_name": "GPT-2 (Tiny)"},
|
9 |
-
"Lyon28/GPT-2": {"task": "text-generation", "display_name": "GPT-2"},
|
10 |
-
"Lyon28/Distil_GPT-2": {"task": "text-generation", "display_name": "DistilGPT-2"},
|
11 |
-
"Lyon28/GPT-Neo": {"task": "text-generation", "display_name": "GPT-Neo"},
|
12 |
-
"Lyon28/Pythia": {"task": "text-generation", "display_name": "Pythia"},
|
13 |
-
"Lyon28/Tinny-Llama": {"task": "text-generation", "display_name": "Tinny-Llama"},
|
14 |
-
"Lyon28/Bert-Tinny": {"task": "fill-mask", "display_name": "BERT (Tiny)"},
|
15 |
-
"Lyon28/Distilbert-Base-Uncased": {"task": "fill-mask", "display_name": "DistilBERT"},
|
16 |
-
"Lyon28/Albert-Base-V2": {"task": "fill-mask", "display_name": "Albert v2"},
|
17 |
-
"Lyon28/Electra-Small": {"task": "fill-mask", "display_name": "Electra (Small)"},
|
18 |
-
"Lyon28/T5-Small": {"task": "text2text-generation", "display_name": "T5 (Small)"},
|
19 |
-
}
|
20 |
-
|
21 |
-
# 2. Buat "gudang" model (sama persis)
|
22 |
-
loaded_pipelines = {}
|
23 |
-
|
24 |
-
# 3. Definisikan format request yang diterima
|
25 |
-
class InferenceRequest(BaseModel):
|
26 |
-
model_id: str
|
27 |
-
prompt: str
|
28 |
-
|
29 |
-
app = FastAPI()
|
30 |
-
|
31 |
-
@app.get("/")
|
32 |
-
def read_root():
|
33 |
-
return {"message": "Smart Inference API is running. Use the /inference endpoint."}
|
34 |
-
|
35 |
-
@app.post("/inference")
|
36 |
-
def smart_inference(request: InferenceRequest):
|
37 |
-
model_id = request.model_id
|
38 |
-
|
39 |
-
# Validasi: Cek apakah model_id ada di config kita
|
40 |
-
if model_id not in MODEL_CONFIG:
|
41 |
-
raise HTTPException(status_code=400, detail=f"Model '{model_id}' tidak valid atau tidak didukung.")
|
42 |
-
|
43 |
-
task = MODEL_CONFIG[model_id]["task"]
|
44 |
-
|
45 |
-
# Cek "gudang" (logika caching yang sama)
|
46 |
-
if model_id not in loaded_pipelines:
|
47 |
-
print(f"Memuat model: {model_id} untuk task: {task}...")
|
48 |
-
try:
|
49 |
-
pipe = pipeline(task, model=model_id, device=-1)
|
50 |
-
loaded_pipelines[model_id] = pipe
|
51 |
-
print("Model berhasil dimuat.")
|
52 |
-
except Exception as e:
|
53 |
-
raise HTTPException(status_code=500, detail=f"Gagal memuat model: {str(e)}")
|
54 |
-
|
55 |
-
pipe = loaded_pipelines[model_id]
|
56 |
-
|
57 |
-
# Jalankan inference
|
58 |
-
try:
|
59 |
-
result = pipe(request.prompt)
|
60 |
-
return {"model_used": model_id, "task": task, "input_prompt": request.prompt, "output": result}
|
61 |
-
except Exception as e:
|
62 |
-
raise HTTPException(status_code=500, detail=f"Gagal melakukan inference: {str(e)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main.py
ADDED
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from fastapi import FastAPI, HTTPException
|
3 |
+
from pydantic import BaseModel
|
4 |
+
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
|
5 |
+
from typing import Dict, Any
|
6 |
+
|
7 |
+
# Inisialisasi API
|
8 |
+
app = FastAPI(
|
9 |
+
title="Lyon28 Multi-Model API",
|
10 |
+
description="API serbaguna untuk 11 model Lyon28"
|
11 |
+
)
|
12 |
+
|
13 |
+
# --- Daftar model dan tugasnya ---
|
14 |
+
# Kita buat kamus (dictionary) agar mudah dipanggil.
|
15 |
+
# Ini juga membantu kita tahu pipeline apa yang harus digunakan untuk setiap model.
|
16 |
+
MODEL_MAPPING = {
|
17 |
+
# Generative Models (Text Generation)
|
18 |
+
"Tinny-Llama": {"id": "Lyon28/Tinny-Llama", "task": "text-generation"},
|
19 |
+
"Pythia": {"id": "Lyon28/Pythia", "task": "text-generation"},
|
20 |
+
"GPT-2": {"id": "Lyon28/GPT-2", "task": "text-generation"},
|
21 |
+
"GPT-Neo": {"id": "Lyon28/GPT-Neo", "task": "text-generation"},
|
22 |
+
"Distil_GPT-2": {"id": "Lyon28/Distil_GPT-2", "task": "text-generation"},
|
23 |
+
"GPT-2-Tinny": {"id": "Lyon28/GPT-2-Tinny", "task": "text-generation"},
|
24 |
+
|
25 |
+
# Text-to-Text Model
|
26 |
+
"T5-Small": {"id": "Lyon28/T5-Small", "task": "text2text-generation"},
|
27 |
+
|
28 |
+
# Fill-Mask Models
|
29 |
+
"Bert-Tinny": {"id": "Lyon28/Bert-Tinny", "task": "fill-mask"},
|
30 |
+
"Albert-Base-V2": {"id": "Lyon28/Albert-Base-V2", "task": "fill-mask"},
|
31 |
+
"Distilbert-Base-Uncased": {"id": "Lyon28/Distilbert-Base-Uncased", "task": "fill-mask"},
|
32 |
+
"Electra-Small": {"id": "Lyon28/Electra-Small", "task": "fill-mask"},
|
33 |
+
}
|
34 |
+
|
35 |
+
# --- Cache untuk menyimpan model yang sudah dimuat ---
|
36 |
+
# Ini penting! Kita tidak mau memuat model yang sama berulang-ulang.
|
37 |
+
# Ini akan menghemat waktu dan memori.
|
38 |
+
PIPELINE_CACHE = {}
|
39 |
+
|
40 |
+
def get_pipeline(model_name: str):
|
41 |
+
"""Fungsi untuk memuat model dari cache atau dari Hub jika belum ada."""
|
42 |
+
if model_name in PIPELINE_CACHE:
|
43 |
+
print(f"Mengambil model '{model_name}' dari cache.")
|
44 |
+
return PIPELINE_CACHE[model_name]
|
45 |
+
|
46 |
+
if model_name not in MODEL_MAPPING:
|
47 |
+
raise HTTPException(status_code=404, detail=f"Model '{model_name}' tidak ditemukan.")
|
48 |
+
|
49 |
+
model_info = MODEL_MAPPING[model_name]
|
50 |
+
model_id = model_info["id"]
|
51 |
+
task = model_info["task"]
|
52 |
+
|
53 |
+
print(f"Memuat model '{model_name}' ({model_id}) untuk tugas '{task}'...")
|
54 |
+
try:
|
55 |
+
# device_map="auto" menggunakan accelerate untuk menempatkan model secara efisien
|
56 |
+
pipe = pipeline(task, model=model_id, device_map="auto")
|
57 |
+
PIPELINE_CACHE[model_name] = pipe
|
58 |
+
print(f"Model '{model_name}' berhasil dimuat dan disimpan di cache.")
|
59 |
+
return pipe
|
60 |
+
except Exception as e:
|
61 |
+
raise HTTPException(status_code=500, detail=f"Gagal memuat model '{model_name}': {str(e)}")
|
62 |
+
|
63 |
+
|
64 |
+
# --- Definisikan struktur request dari user ---
|
65 |
+
class InferenceRequest(BaseModel):
|
66 |
+
model_name: str # Nama kunci dari MODEL_MAPPING, misal: "Tinny-Llama"
|
67 |
+
prompt: str
|
68 |
+
parameters: Dict[str, Any] = {} # Parameter tambahan seperti max_length, temperature, dll.
|
69 |
+
|
70 |
+
@app.get("/")
|
71 |
+
def read_root():
|
72 |
+
"""Endpoint untuk mengecek status API dan daftar model yang tersedia."""
|
73 |
+
return {
|
74 |
+
"status": "API is running!",
|
75 |
+
"available_models": list(MODEL_MAPPING.keys())
|
76 |
+
}
|
77 |
+
|
78 |
+
@app.post("/invoke")
|
79 |
+
def invoke_model(request: InferenceRequest):
|
80 |
+
"""Endpoint utama untuk melakukan inferensi pada model yang dipilih."""
|
81 |
+
try:
|
82 |
+
# Ambil atau muat pipeline model
|
83 |
+
pipe = get_pipeline(request.model_name)
|
84 |
+
|
85 |
+
# Gabungkan prompt dengan parameter tambahan
|
86 |
+
# Ini membuat API kita sangat fleksibel!
|
87 |
+
result = pipe(request.prompt, **request.parameters)
|
88 |
+
|
89 |
+
return {
|
90 |
+
"model_used": request.model_name,
|
91 |
+
"prompt": request.prompt,
|
92 |
+
"parameters": request.parameters,
|
93 |
+
"result": result
|
94 |
+
}
|
95 |
+
except HTTPException as e:
|
96 |
+
# Meneruskan error yang sudah kita definisikan
|
97 |
+
raise e
|
98 |
+
except Exception as e:
|
99 |
+
# Menangkap error lain yang mungkin terjadi saat inferensi
|
100 |
+
raise HTTPException(status_code=500, detail=f"Terjadi error saat inferensi: {str(e)}")
|
101 |
+
|
102 |
+
# Saat aplikasi pertama kali dijalankan, kita bisa coba muat satu model populer
|
103 |
+
# untuk menghangatkan sistem (warm-up). Ini opsional.
|
104 |
+
@app.on_event("startup")
|
105 |
+
async def startup_event():
|
106 |
+
print("API startup: Melakukan warm-up dengan memuat satu model awal...")
|
107 |
+
try:
|
108 |
+
get_pipeline("GPT-2-Tinny") # Pilih model yang kecil dan cepat
|
109 |
+
except Exception as e:
|
110 |
+
print(f"Gagal melakukan warm-up: {e}")
|