patrickvonplaten
commited on
Commit
•
2cb116a
1
Parent(s):
afe31b3
Update README.md
Browse files
README.md
CHANGED
@@ -80,26 +80,38 @@ You can also make use of a ready-to-go [docker image](https://hub.docker.com/lay
|
|
80 |
```py
|
81 |
from vllm import LLM
|
82 |
from vllm.sampling_params import SamplingParams
|
|
|
|
|
|
|
|
|
|
|
83 |
|
84 |
-
|
85 |
|
|
|
86 |
sampling_params = SamplingParams(max_tokens=8192)
|
87 |
|
88 |
llm = LLM(model=model_name, tokenizer_mode="mistral")
|
89 |
|
90 |
-
|
91 |
-
|
|
|
|
|
|
|
|
|
|
|
92 |
|
93 |
-
|
94 |
-
|
95 |
-
"role": "user",
|
96 |
-
"content": [{"type": "text", "text": prompt}, {"type": "image_url", "image_url": {"url": image_url}}]
|
97 |
-
},
|
98 |
-
]
|
99 |
|
100 |
-
|
|
|
|
|
|
|
101 |
|
102 |
print(outputs[0].outputs[0].text)
|
|
|
|
|
103 |
```
|
104 |
|
105 |
### Mistral-inference
|
|
|
80 |
```py
|
81 |
from vllm import LLM
|
82 |
from vllm.sampling_params import SamplingParams
|
83 |
+
from vllm.inputs.data import TokensPrompt
|
84 |
+
import requests
|
85 |
+
from PIL import Image
|
86 |
+
from io import BytesIO
|
87 |
+
from vllm.multimodal import MultiModalDataBuiltins
|
88 |
|
89 |
+
from mistral_common.protocol.instruct.messages import TextChunk, ImageURLChunk
|
90 |
|
91 |
+
model_name = "mistralai/Pixtral-12B-Base-2409"
|
92 |
sampling_params = SamplingParams(max_tokens=8192)
|
93 |
|
94 |
llm = LLM(model=model_name, tokenizer_mode="mistral")
|
95 |
|
96 |
+
url = "https://huggingface.co/datasets/patrickvonplaten/random_img/resolve/main/yosemite.png"
|
97 |
+
response = requests.get(url)
|
98 |
+
image = Image.open(BytesIO(response.content))
|
99 |
+
|
100 |
+
prompt = "The image shows a"
|
101 |
+
|
102 |
+
user_content = [ImageURLChunk(image_url=url), TextChunk(text=prompt)]
|
103 |
|
104 |
+
tokenizer = llm.llm_engine.tokenizer.tokenizer.mistral.instruct_tokenizer
|
105 |
+
tokens, _ = tokenizer.encode_user_content(user_content, False)
|
|
|
|
|
|
|
|
|
106 |
|
107 |
+
prompt = TokensPrompt(
|
108 |
+
prompt_token_ids=tokens, multi_modal_data=MultiModalDataBuiltins(image=[image])
|
109 |
+
)
|
110 |
+
outputs = llm.generate(prompt, sampling_params=sampling_params)
|
111 |
|
112 |
print(outputs[0].outputs[0].text)
|
113 |
+
# ' view of a river flowing through the landscape, with prominent rock formations visible on either side of the river. The scene is captured using the UWA 14-24mm zoom lens, which provides a wide-angle perspective,
|
114 |
+
# allowing for a comprehensive view of the surroundings. The photo is credited to Greg Dowdy.
|
115 |
```
|
116 |
|
117 |
### Mistral-inference
|