Spaces:

Lyon28
/

AI-Chat-Character

Sleeping

App Files Files Community

Lyon28 commited on Jun 28

Commit

9815afa

verified ·

1 Parent(s): 71c0eec

Rename app.py to main.py

Browse files

Files changed (2) hide show

app.py +0 -62
main.py +110 -0

app.py DELETED Viewed

@@ -1,62 +0,0 @@
-from fastapi import FastAPI, HTTPException
-from pydantic import BaseModel
-from transformers import pipeline
-# 1. Definisikan "otak" aplikasi (sama persis seperti di Gradio)
-MODEL_CONFIG = {
-    # (Salin MODEL_CONFIG lengkap dari atas ke sini)
-    "Lyon28/GPT-2-Tinny": {"task": "text-generation", "display_name": "GPT-2 (Tiny)"},
-    "Lyon28/GPT-2": {"task": "text-generation", "display_name": "GPT-2"},
-    "Lyon28/Distil_GPT-2": {"task": "text-generation", "display_name": "DistilGPT-2"},
-    "Lyon28/GPT-Neo": {"task": "text-generation", "display_name": "GPT-Neo"},
-    "Lyon28/Pythia": {"task": "text-generation", "display_name": "Pythia"},
-    "Lyon28/Tinny-Llama": {"task": "text-generation", "display_name": "Tinny-Llama"},
-    "Lyon28/Bert-Tinny": {"task": "fill-mask", "display_name": "BERT (Tiny)"},
-    "Lyon28/Distilbert-Base-Uncased": {"task": "fill-mask", "display_name": "DistilBERT"},
-    "Lyon28/Albert-Base-V2": {"task": "fill-mask", "display_name": "Albert v2"},
-    "Lyon28/Electra-Small": {"task": "fill-mask", "display_name": "Electra (Small)"},
-    "Lyon28/T5-Small": {"task": "text2text-generation", "display_name": "T5 (Small)"},
-}
-# 2. Buat "gudang" model (sama persis)
-loaded_pipelines = {}
-# 3. Definisikan format request yang diterima
-class InferenceRequest(BaseModel):
-    model_id: str
-    prompt: str
-app = FastAPI()
-@app.get("/")
-def read_root():
-    return {"message": "Smart Inference API is running. Use the /inference endpoint."}
-@app.post("/inference")
-def smart_inference(request: InferenceRequest):
-    model_id = request.model_id
-    # Validasi: Cek apakah model_id ada di config kita
-    if model_id not in MODEL_CONFIG:
-        raise HTTPException(status_code=400, detail=f"Model '{model_id}' tidak valid atau tidak didukung.")
-    task = MODEL_CONFIG[model_id]["task"]
-    # Cek "gudang" (logika caching yang sama)
-    if model_id not in loaded_pipelines:
-        print(f"Memuat model: {model_id} untuk task: {task}...")
-        try:
-            pipe = pipeline(task, model=model_id, device=-1)
-            loaded_pipelines[model_id] = pipe
-            print("Model berhasil dimuat.")
-        except Exception as e:
-            raise HTTPException(status_code=500, detail=f"Gagal memuat model: {str(e)}")
-    pipe = loaded_pipelines[model_id]
-    # Jalankan inference
-    try:
-        result = pipe(request.prompt)
-        return {"model_used": model_id, "task": task, "input_prompt": request.prompt, "output": result}
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Gagal melakukan inference: {str(e)}")

main.py ADDED Viewed

	@@ -0,0 +1,110 @@

+import torch
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
+from typing import Dict, Any
+# Inisialisasi API
+app = FastAPI(
+    title="Lyon28 Multi-Model API",
+    description="API serbaguna untuk 11 model Lyon28"
+)
+# --- Daftar model dan tugasnya ---
+# Kita buat kamus (dictionary) agar mudah dipanggil.
+# Ini juga membantu kita tahu pipeline apa yang harus digunakan untuk setiap model.
+MODEL_MAPPING = {
+    # Generative Models (Text Generation)
+    "Tinny-Llama": {"id": "Lyon28/Tinny-Llama", "task": "text-generation"},
+    "Pythia": {"id": "Lyon28/Pythia", "task": "text-generation"},
+    "GPT-2": {"id": "Lyon28/GPT-2", "task": "text-generation"},
+    "GPT-Neo": {"id": "Lyon28/GPT-Neo", "task": "text-generation"},
+    "Distil_GPT-2": {"id": "Lyon28/Distil_GPT-2", "task": "text-generation"},
+    "GPT-2-Tinny": {"id": "Lyon28/GPT-2-Tinny", "task": "text-generation"},
+    # Text-to-Text Model
+    "T5-Small": {"id": "Lyon28/T5-Small", "task": "text2text-generation"},
+    # Fill-Mask Models
+    "Bert-Tinny": {"id": "Lyon28/Bert-Tinny", "task": "fill-mask"},
+    "Albert-Base-V2": {"id": "Lyon28/Albert-Base-V2", "task": "fill-mask"},
+    "Distilbert-Base-Uncased": {"id": "Lyon28/Distilbert-Base-Uncased", "task": "fill-mask"},
+    "Electra-Small": {"id": "Lyon28/Electra-Small", "task": "fill-mask"},
+}
+# --- Cache untuk menyimpan model yang sudah dimuat ---
+# Ini penting! Kita tidak mau memuat model yang sama berulang-ulang.
+# Ini akan menghemat waktu dan memori.
+PIPELINE_CACHE = {}
+def get_pipeline(model_name: str):
+    """Fungsi untuk memuat model dari cache atau dari Hub jika belum ada."""
+    if model_name in PIPELINE_CACHE:
+        print(f"Mengambil model '{model_name}' dari cache.")
+        return PIPELINE_CACHE[model_name]
+    if model_name not in MODEL_MAPPING:
+        raise HTTPException(status_code=404, detail=f"Model '{model_name}' tidak ditemukan.")
+    model_info = MODEL_MAPPING[model_name]
+    model_id = model_info["id"]
+    task = model_info["task"]
+    print(f"Memuat model '{model_name}' ({model_id}) untuk tugas '{task}'...")
+    try:
+        # device_map="auto" menggunakan accelerate untuk menempatkan model secara efisien
+        pipe = pipeline(task, model=model_id, device_map="auto")
+        PIPELINE_CACHE[model_name] = pipe
+        print(f"Model '{model_name}' berhasil dimuat dan disimpan di cache.")
+        return pipe
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Gagal memuat model '{model_name}': {str(e)}")
+# --- Definisikan struktur request dari user ---
+class InferenceRequest(BaseModel):
+    model_name: str  # Nama kunci dari MODEL_MAPPING, misal: "Tinny-Llama"
+    prompt: str
+    parameters: Dict[str, Any] = {} # Parameter tambahan seperti max_length, temperature, dll.
+@app.get("/")
+def read_root():
+    """Endpoint untuk mengecek status API dan daftar model yang tersedia."""
+    return {
+        "status": "API is running!",
+        "available_models": list(MODEL_MAPPING.keys())
+    }
+@app.post("/invoke")
+def invoke_model(request: InferenceRequest):
+    """Endpoint utama untuk melakukan inferensi pada model yang dipilih."""
+    try:
+        # Ambil atau muat pipeline model
+        pipe = get_pipeline(request.model_name)
+        # Gabungkan prompt dengan parameter tambahan
+        # Ini membuat API kita sangat fleksibel!
+        result = pipe(request.prompt, **request.parameters)
+        return {
+            "model_used": request.model_name,
+            "prompt": request.prompt,
+            "parameters": request.parameters,
+            "result": result
+        }
+    except HTTPException as e:
+        # Meneruskan error yang sudah kita definisikan
+        raise e
+    except Exception as e:
+        # Menangkap error lain yang mungkin terjadi saat inferensi
+        raise HTTPException(status_code=500, detail=f"Terjadi error saat inferensi: {str(e)}")
+# Saat aplikasi pertama kali dijalankan, kita bisa coba muat satu model populer
+# untuk menghangatkan sistem (warm-up). Ini opsional.
+@app.on_event("startup")
+async def startup_event():
+    print("API startup: Melakukan warm-up dengan memuat satu model awal...")
+    try:
+        get_pipeline("GPT-2-Tinny") # Pilih model yang kecil dan cepat
+    except Exception as e:
+        print(f"Gagal melakukan warm-up: {e}")