Spaces:

nuseAI
/

FastAPI

Sleeping

App Files Files Community

raghavNCI commited on Jul 7

Commit

f0f712f

1 Parent(s): 17fbf3d

mistral after hosted on aws

Browse files

Files changed (2) hide show

Dockerfile +1 -0
models_initialization/mistral_registry.py +47 -31

Dockerfile CHANGED Viewed

@@ -11,6 +11,7 @@ WORKDIR /app
 COPY --chown=user ./requirements.txt requirements.txt
 RUN pip install --no-cache-dir --upgrade -r requirements.txt
 COPY --chown=user . /app
 CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

 COPY --chown=user ./requirements.txt requirements.txt
 RUN pip install --no-cache-dir --upgrade -r requirements.txt
+RUN pip install --no-cache-dir boto3
 COPY --chown=user . /app
 CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

models_initialization/mistral_registry.py CHANGED Viewed

@@ -1,51 +1,67 @@
 import os
-import requests
 import json
-from dotenv import load_dotenv
-load_dotenv()
-HF_TOKEN   = os.getenv("HF_TOKEN")
-HF_API_URL = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.3"
-HEADERS = {
-    "Authorization": f"Bearer {HF_TOKEN}",
-    "Content-Type":  "application/json"
-}
-def mistral_generate(prompt: str,
-                     max_new_tokens: int = 128,
-                     temperature: float = 0.7) -> str:
     """
-    Call the HF Inference-API for Mistral-7B-Instruct-v0.3.
-    - Automatically waits while the model spins up (`wait_for_model=true`).
-    - Returns the generated text or an empty string on failure.
     """
     payload = {
         "inputs": prompt,
         "parameters": {
             "max_new_tokens": max_new_tokens,
-            "temperature":    temperature
-        }
     }
     try:
-        r = requests.post(
-            HF_API_URL,
-            headers=HEADERS,
-            params={"wait_for_model": "true"},  # ⭐ key change
-            json=payload,                       # use `json=` not `data=`
-            timeout=90                          # give the model time to load
         )
-        r.raise_for_status()
-        data = r.json()
-        # HF returns a list of generated texts for standard text-generation models
-        if isinstance(data, list) and data:
-            return data[0].get("generated_text", "").strip()
-    except requests.exceptions.RequestException as e:
-        # You might want to log `r.text` as well for quota or auth errors
-        print("❌ Mistral API error:", str(e))
     return ""

 import os
 import json
+import boto3
+from botocore.config import Config
+from botocore.exceptions import BotoCoreError, ClientError
+# ──────────────────────────────────────────────────────────────
+# Environment variables you need (add them in your HF Space)
+# ──────────────────────────────────────────────────────────────
+# AWS_ACCESS_KEY_ID
+# AWS_SECRET_ACCESS_KEY
+# AWS_REGION                → e.g. "us-east-1"
+# SAGEMAKER_ENDPOINT_NAME   → e.g. "mistral-endpoint"
+# ──────────────────────────────────────────────────────────────
+AWS_REGION  = os.getenv("AWS_REGION", "us-east-1")
+ENDPOINT    = os.getenv("SAGEMAKER_ENDPOINT_NAME", "mistral-endpoint")
+# Optional: configure retries / timeouts
+boto_cfg = Config(
+    retries={"max_attempts": 3, "mode": "standard"},
+    connect_timeout=10,
+    read_timeout=120,
+)
+sm_client = boto3.client("sagemaker-runtime", region_name=AWS_REGION, config=boto_cfg)
+def mistral_generate(
+    prompt: str,
+    max_new_tokens: int = 128,
+    temperature: float = 0.7,
+) -> str:
     """
+    Call the SageMaker endpoint that hosts Mistral-7B.
+    Returns the generated text or an empty string on failure.
     """
     payload = {
         "inputs": prompt,
         "parameters": {
             "max_new_tokens": max_new_tokens,
+            "temperature":    temperature,
+        },
     }
     try:
+        # Invoke the endpoint
+        response = sm_client.invoke_endpoint(
+            EndpointName=ENDPOINT,
+            ContentType="application/json",
+            Body=json.dumps(payload).encode("utf-8"),
         )
+        # SageMaker returns a byte stream → decode & load JSON
+        result = json.loads(response["Body"].read())
+        if isinstance(result, list) and result:
+            return result[0].get("generated_text", "").strip()
+    except (BotoCoreError, ClientError) as e:
+        # Log SageMaker errors (throttling, auth, etc.)
+        print("❌ SageMaker invocation error:", str(e))
+    except Exception as e:
+        print("❌ Unknown error:", str(e))
     return ""