sreejith8100 commited on
Commit
1a17f60
·
verified ·
1 Parent(s): 5b37e3c

Delete endpoint_handler.py

Browse files
Files changed (1) hide show
  1. endpoint_handler.py +0 -86
endpoint_handler.py DELETED
@@ -1,86 +0,0 @@
1
- import torch
2
- from PIL import Image
3
- from transformers import AutoModel, AutoTokenizer
4
- from io import BytesIO
5
- import base64
6
- from huggingface_hub import login
7
- import os
8
-
9
- class EndpointHandler:
10
- def __init__(self, model_dir=None):
11
- print("[Init] Initializing EndpointHandler...")
12
- self.load_model()
13
-
14
- def load_model(self):
15
- hf_token = os.getenv("HF_TOKEN")
16
- # Replace with the quantized version if available
17
- model_path = "openbmb/MiniCPM-V-4" # Hypothetical quantized version
18
-
19
- if hf_token:
20
- print("[Auth] Logging into Hugging Face Hub with token...")
21
- login(token=hf_token)
22
-
23
- print(f"[Model Load] Loading quantized model from: {model_path}")
24
- try:
25
- self.model = AutoModel.from_pretrained(model_path, trust_remote_code=True,
26
- attn_implementation='sdpa', torch_dtype=torch.bfloat16).eval().cuda()
27
- print("model loaded sucessfully")
28
- self.tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
29
- print("tokenizer loaded sucessfully")
30
-
31
- except Exception as e:
32
- print(f"[Model Load Error] {e}")
33
- raise RuntimeError(f"Failed to load quantized model: {e}")
34
-
35
- def load_image(self, image_base64):
36
- try:
37
- print("[Image Load] Decoding base64 image...")
38
- image_bytes = base64.b64decode(image_base64)
39
- image = Image.open(BytesIO(image_bytes)).convert("RGB")
40
- print("[Image Load] Image successfully decoded and converted to RGB.")
41
- return image
42
- except Exception as e:
43
- print(f"[Image Load Error] {e}")
44
- raise ValueError(f"Failed to open image from base64 string: {e}")
45
-
46
- def predict(self, request):
47
- print(f"[Predict] Received request: {request}")
48
-
49
- image_base64 = request.get("inputs", {}).get("image")
50
- question = request.get("inputs", {}).get("question")
51
- stream = request.get("inputs", {}).get("stream", False)
52
-
53
- if not image_base64 or not question:
54
- print("[Predict Error] Missing 'image' or 'question' in the request.")
55
- return {"error": "Missing 'image' or 'question' in inputs."}
56
-
57
- try:
58
- image = self.load_image(image_base64)
59
- msgs = [{"role": "user", "content": [image, question]}]
60
-
61
- print(f"[Predict] Asking model with question: {question}")
62
- print("[Predict] Starting chat inference...")
63
-
64
- res = self.model.chat(
65
- image=image,
66
- msgs=msgs,
67
- tokenizer=self.tokenizer,
68
- sampling=True,
69
- stream=stream
70
- )
71
-
72
- if stream:
73
- for new_text in res:
74
- yield {"output": new_text}
75
- else:
76
- generated_text = "".join(res)
77
- print("[Predict] Inference complete.")
78
- return {"output": generated_text}
79
-
80
- except Exception as e:
81
- print(f"[Predict Error] {e}")
82
- return {"error": str(e)}
83
-
84
- def __call__(self, data):
85
- print("[__call__] Invoked handler with data.")
86
- return self.predict(data)