Lemorra commited on
Commit
083d486
Β·
1 Parent(s): 26e3b58

πŸŽ‰ Pushed initial codebase

Browse files
.gitignore ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ignores Python cache directories:
2
+ # __pycache__/ - ignores cache directory in root folder
3
+ # */__pycache__/ - ignores cache directories one level deep
4
+ # **/__pycache__/ - ignores cache directories at any depth
5
+ __pycache__/
6
+ */__pycache__/
7
+ **/__pycache__/
8
+ # Python bytecode files:
9
+ # *.pyc - compiled Python files
10
+ # *.pyo - optimized Python files
11
+ # *.pyd - Python DLL files
12
+ *.py[cod]
13
+
14
+ # Python implementation-specific bytecode
15
+ *$py.class
16
+
Dockerfile ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
2
+ # you will also find guides on how best to write your Dockerfile
3
+
4
+ FROM python:3.9
5
+
6
+ RUN useradd -m -u 1000 user
7
+ USER user
8
+ ENV PATH="/home/user/.local/bin:$PATH"
9
+
10
+ WORKDIR /app
11
+
12
+ COPY --chown=user ./requirements.txt requirements.txt
13
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
14
+
15
+ COPY --chown=user . /app
16
+ CMD ["uvicorn", "src.app:app", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -1,12 +1,12 @@
1
  ---
2
- title: Qwen VL Instruct Backend
3
- emoji: 😻
4
- colorFrom: blue
5
- colorTo: pink
6
  sdk: docker
7
  pinned: false
8
  license: mit
9
- short_description: QwenVL models; Single/Multi Image support
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Qwen2.5 VL 3B Instruct Backend API
3
+ emoji: πŸ“š
4
+ colorFrom: red
5
+ colorTo: yellow
6
  sdk: docker
7
  pinned: false
8
  license: mit
9
+ short_description: A Qwen2.5-VL-3B-Instruct backend for testing
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn[standard]
3
+ transformers
4
+ accelerate
5
+ qwen-vl-utils[decord]==0.0.8
6
+ python-dotenv
7
+ PyJWT
8
+ pydantic
9
+ torch
10
+ torchvision
11
+ hf_xet
src/__pycache__/app.cpython-310.pyc ADDED
Binary file (1.19 kB). View file
 
src/app.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pyexpat import model
2
+ from typing import Annotated
3
+ from fastapi import FastAPI, Depends
4
+ from .utils.authentication import verify_token
5
+ from .utils.payload_model import SingleInferencePayload, VideoInferencePayload
6
+ from .utils.qwen_inference import Qwen2_5
7
+
8
+ import os
9
+ from dotenv import load_dotenv
10
+
11
+ load_dotenv()
12
+
13
+ model_path = os.getenv("MODEL_PATH")
14
+
15
+ model_object = Qwen2_5(model_path)
16
+
17
+ app = FastAPI()
18
+
19
+ @app.get("/")
20
+ def greet_json():
21
+ return {
22
+ "message": "Welcome! The backend API for Qwen2.5-VL-3B-Instruct model is running.",
23
+ "status": "active"
24
+ }
25
+
26
+ @app.post("/single_inference")
27
+ def single_inference(payload: SingleInferencePayload, _token: Annotated[dict, Depends(verify_token)]):
28
+ return model_object.get_single_inference(payload)
29
+
30
+ @app.post("/video_inference")
31
+ def video_inference(payload: VideoInferencePayload, _token: Annotated[dict, Depends(verify_token)]):
32
+ return model_object.get_video_inference(payload)
src/utils/__init__.py ADDED
File without changes
src/utils/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (236 Bytes). View file
 
src/utils/__pycache__/authentication.cpython-310.pyc ADDED
Binary file (1.02 kB). View file
 
src/utils/__pycache__/payload_model.cpython-310.pyc ADDED
Binary file (728 Bytes). View file
 
src/utils/__pycache__/qwen_inference.cpython-310.pyc ADDED
Binary file (712 Bytes). View file
 
src/utils/authentication.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import HTTPException, Header
2
+ import jwt
3
+ from dotenv import load_dotenv
4
+ import os
5
+
6
+ load_dotenv()
7
+
8
+ def get_secret_key():
9
+ return os.getenv("SECRET_KEY")
10
+
11
+ async def verify_token(authorization: str = Header(...)):
12
+ try:
13
+ token_type, token = authorization.split()
14
+ if token_type.lower() != "bearer":
15
+ raise HTTPException(status_code=401, detail="Invalid token type")
16
+ return jwt.decode(token, get_secret_key(), algorithms=["HS256"])
17
+ except jwt.ExpiredSignatureError:
18
+ raise HTTPException(status_code=401, detail="Token has expired")
19
+ except (jwt.InvalidTokenError, IndexError):
20
+ raise HTTPException(status_code=401, detail="Invalid token")
src/utils/payload_model.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+
3
+ class SingleInferencePayload(BaseModel):
4
+ image_path: str
5
+ question: str
6
+
7
+ class VideoInferencePayload(BaseModel):
8
+ video_path: str
9
+ question: list[str]
src/utils/qwen_inference.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .payload_model import SingleInferencePayload, VideoInferencePayload
2
+ from transformers import AutoModelForVision2Seq, AutoTokenizer, AutoProcessor
3
+ from qwen_vl_utils import process_vision_info
4
+ from pydantic import BaseModel
5
+ from typing import Optional
6
+
7
+
8
+ class Qwen2_5(BaseModel):
9
+ model: Optional[AutoModelForVision2Seq] = None
10
+ tokenizer: Optional[AutoTokenizer] = None
11
+ processor: Optional[AutoProcessor] = None
12
+
13
+ model_config = {
14
+ "arbitrary_types_allowed": True,
15
+ "from_attributes": True
16
+ }
17
+
18
+ def __init__(self, model_path: str):
19
+ super().__init__()
20
+ self.model = AutoModelForVision2Seq.from_pretrained(
21
+ model_path, torch_dtype="auto", device_map="auto"
22
+ )
23
+ self.tokenizer = AutoTokenizer.from_pretrained(model_path)
24
+ self.processor = AutoProcessor.from_pretrained(model_path)
25
+
26
+ def prepare_single_inference(self, image: str, question: str):
27
+ image = f"data:image;base64,{image}"
28
+ messages = [
29
+ {
30
+ "role": "user",
31
+ "content": [
32
+ {
33
+ "type": "text",
34
+ "image": image,
35
+
36
+ },
37
+ {
38
+ "type": "text",
39
+ "text": question
40
+ },
41
+ ],
42
+ }
43
+ ]
44
+ text = self.processor.apply_chat_template(
45
+ messages, tokenize=False, add_generation_prompt=True
46
+ )
47
+ image_inputs, video_inputs = process_vision_info(messages)
48
+ inputs = self.processor(
49
+ text=[text],
50
+ images=image_inputs,
51
+ videos=video_inputs,
52
+ padding=True,
53
+ return_tensors="pt",
54
+ )
55
+ inputs = inputs.to("cuda")
56
+
57
+ return inputs
58
+
59
+ def prepare_video_inference(self, video: list[str], question: str):
60
+ base64_videos = []
61
+ for frame in video:
62
+ base64_videos.append(f"data:image;base64,{frame}")
63
+ messages = [
64
+ {
65
+ "role": "user",
66
+ "content": [
67
+ {
68
+ "type": "video",
69
+ "video": base64_videos,
70
+ },
71
+ {
72
+ "type": "text",
73
+ "text": question
74
+ },
75
+ ],
76
+ }
77
+ ]
78
+ text = self.processor.apply_chat_template(
79
+ messages, tokenize=False, add_generation_prompt=True
80
+ )
81
+ image_inputs, video_inputs, video_kwargs = process_vision_info(messages, return_video_kwargs=True)
82
+ inputs = self.processor(
83
+ text=[text],
84
+ images=image_inputs,
85
+ videos=video_inputs,
86
+ fps=1.0,
87
+ padding=True,
88
+ return_tensors="pt",
89
+ **video_kwargs,
90
+ )
91
+ inputs = inputs.to("cuda")
92
+ return inputs
93
+
94
+ def get_single_inference(self, payload: SingleInferencePayload):
95
+ try:
96
+ processed_inputs = self.prepare_single_inference(payload.image_path, payload.question)
97
+ generated_ids = self.model.generate(**processed_inputs, max_new_tokens=128)
98
+ generated_ids_trimmed = [
99
+ out_ids[len(in_ids) :] for in_ids, out_ids in zip(processed_inputs.input_ids, generated_ids)
100
+ ]
101
+ output_text = self.processor.batch_decode(
102
+ generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
103
+ )
104
+ print(f"Model generated text: {output_text}")
105
+ return {
106
+ "message": output_text,
107
+ "status": 200
108
+ }
109
+ except Exception as e:
110
+ return {
111
+ "message": str(e),
112
+ "status": 500
113
+ }
114
+
115
+ def get_video_inference(self, payload: VideoInferencePayload):
116
+ try:
117
+ processed_inputs = self.prepare_video_inference(payload.video_path, payload.question)
118
+ generated_ids = self.model.generate(**processed_inputs, max_new_tokens=128)
119
+ generated_ids_trimmed = [
120
+ out_ids[len(in_ids) :] for in_ids, out_ids in zip(processed_inputs.input_ids, generated_ids)
121
+ ]
122
+ output_text = self.processor.batch_decode(
123
+ generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
124
+ )
125
+ print(f"Model generated text: {output_text}")
126
+ return {
127
+ "message": output_text,
128
+ "status": 200
129
+ }
130
+ except Exception as e:
131
+ return {
132
+ "message": str(e),
133
+ "status": 500
134
+ }