From Text to Pose to Image: Improving Diffusion Model Control and Quality
Paper
•
2411.12872
•
Published
Superintelligence Alignment
from loadimg import load_img
from huggingface_hub import InferenceClient
# or load a local image
my_b64_img = load_img(imgPath_url_pillow_or_numpy ,output_type="base64" )
client = InferenceClient(api_key="hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx")
messages = [
{
"role": "user",
"content": [
{
"type": "text",
"text": "Describe this image in one sentence."
},
{
"type": "image_url",
"image_url": {
"url": my_b64_img # base64 allows using images without uploading them to the web
}
}
]
}
]
stream = client.chat.completions.create(
model="meta-llama/Llama-3.2-11B-Vision-Instruct",
messages=messages,
max_tokens=500,
stream=True
)
for chunk in stream:
print(chunk.choices[0].delta.content, end="")
label2id = {
"Bénéfices professionnels": 0,
"Contrôle et contentieux": 1,
"Dispositifs transversaux": 2,
"Fiscalité des entreprises": 3,
"Patrimoine et enregistrement": 4,
"Revenus particuliers": 5,
"Revenus patrimoniaux": 6,
"Taxes sur la consommation": 7
}
id2label = {
0: "Bénéfices professionnels",
1: "Contrôle et contentieux",
2: "Dispositifs transversaux",
3: "Fiscalité des entreprises",
4: "Patrimoine et enregistrement",
5: "Revenus particuliers",
6: "Revenus patrimoniaux",
7: "Taxes sur la consommation"
}