File size: 1,345 Bytes
fe15a96 5fd680d aedf7d8 5fd680d a7a9bdc 5fd680d fe15a96 a7a9bdc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 |
import gradio as gr
from PIL import Image
import requests
import base64
import openai
openai.api_key = ""
openai.api_base = "https://api.deepinfra.com/v1/openai"
def todataimage(file, ext):
image_data_uri = "data:image/png;base64," + base64.b64encode(file)
return image_data_uri
def caption(file, ext):
response = requests.post("https://russellc-comparing-captioning-models.hf.space/run/predict", json={
"data": [
todataimage(file, ext),
]}).json()
data = response["data"]
chat_completion = openai.ChatCompletion.create(
model="meta-llama/Llama-2-70b-chat-hf",
messages=[{"role": "system", "content": "you will be given descriptions of one image from a varity of image captioning models with a varity of quality, what you need to do is combine them into one image caption and make that be your output, no extras words like \"here is your output\", remeber, don't take too much information from low quality, or too little from high. do NOT contain ANY text other than the description"},{"role":"user", "content":"High Quality:\n"+data[1]+"\n"+data[3]+"\nMedium Qualitt:\n"+data[2]+"\nLow Quality\n"+data[0]}],
)
return chat_completion.choices[0].message.content
def image_predict(image):
return caption(image, "png")
iface = gr.Interface(fn=image_predict, inputs="image", outputs="label")
iface.launch() |