Spaces:

nuseAI
/

FastAPI

Sleeping

App Files Files Community

raghavNCI commited on Jul 7

Commit

e30a3df

1 Parent(s): f0f712f

mistral connection with inference

Browse files

Files changed (1) hide show

models_initialization/mistral_registry.py +32 -38

models_initialization/mistral_registry.py CHANGED Viewed

@@ -1,29 +1,19 @@
 import os
 import json
-import boto3
-from botocore.config import Config
-from botocore.exceptions import BotoCoreError, ClientError
-# ──────────────────────────────────────────────────────────────
-# Environment variables you need (add them in your HF Space)
-# ──────────────────────────────────────────────────────────────
-# AWS_ACCESS_KEY_ID
-# AWS_SECRET_ACCESS_KEY
-# AWS_REGION                → e.g. "us-east-1"
-# SAGEMAKER_ENDPOINT_NAME   → e.g. "mistral-endpoint"
-# ──────────────────────────────────────────────────────────────
-AWS_REGION  = os.getenv("AWS_REGION", "us-east-1")
-ENDPOINT    = os.getenv("SAGEMAKER_ENDPOINT_NAME", "mistral-endpoint")
-# Optional: configure retries / timeouts
-boto_cfg = Config(
-    retries={"max_attempts": 3, "mode": "standard"},
-    connect_timeout=10,
-    read_timeout=120,
-)
-sm_client = boto3.client("sagemaker-runtime", region_name=AWS_REGION, config=boto_cfg)
 def mistral_generate(
@@ -32,8 +22,8 @@ def mistral_generate(
     temperature: float = 0.7,
 ) -> str:
     """
-    Call the SageMaker endpoint that hosts Mistral-7B.
-    Returns the generated text or an empty string on failure.
     """
     payload = {
         "inputs": prompt,
@@ -44,22 +34,26 @@ def mistral_generate(
     }
     try:
-        # Invoke the endpoint
-        response = sm_client.invoke_endpoint(
-            EndpointName=ENDPOINT,
-            ContentType="application/json",
-            Body=json.dumps(payload).encode("utf-8"),
         )
-        # SageMaker returns a byte stream → decode & load JSON
-        result = json.loads(response["Body"].read())
-        if isinstance(result, list) and result:
-            return result[0].get("generated_text", "").strip()
-    except (BotoCoreError, ClientError) as e:
-        # Log SageMaker errors (throttling, auth, etc.)
-        print("❌ SageMaker invocation error:", str(e))
     except Exception as e:
         print("❌ Unknown error:", str(e))

 import os
 import json
+import requests
+from requests.exceptions import RequestException
+HF_ENDPOINT_URL   = os.getenv("HF_ENDPOINT_URL")
+HF_ENDPOINT_TOKEN = os.getenv("HF_ENDPOINT_TOKEN")
+assert HF_ENDPOINT_URL,   "❌ HF_ENDPOINT_URL is not set"
+assert HF_ENDPOINT_TOKEN, "❌ HF_ENDPOINT_TOKEN is not set"
+HEADERS = {
+    "Authorization": f"Bearer {HF_ENDPOINT_TOKEN}",
+    "Content-Type":  "application/json",
+    "Accept":        "application/json",
+}
 def mistral_generate(
     temperature: float = 0.7,
 ) -> str:
     """
+    Call the Hugging Face Inference Endpoint that hosts Mistral-7B.
+    Returns the generated text, or an empty string on failure.
     """
     payload = {
         "inputs": prompt,
     }
     try:
+        r = requests.post(
+            HF_ENDPOINT_URL,
+            headers=HEADERS,
+            json=payload,
+            timeout=90,        # HF spins up cold endpoints too
         )
+        r.raise_for_status()
+        data = r.json()
+        # HF Endpoints usually return a *list* of dicts
+        if isinstance(data, list) and data:
+            return data[0].get("generated_text", "").strip()
+        # Some endpoints return a single dict
+        if isinstance(data, dict) and "generated_text" in data:
+            return data["generated_text"].strip()
+    except RequestException as e:
+        print("❌ HF Endpoint error:", str(e))
+        if e.response is not None:
+            print("Endpoint said:", e.response.text[:300])
     except Exception as e:
         print("❌ Unknown error:", str(e))