|
import gradio as gr |
|
from PIL import Image |
|
import requests |
|
import base64 |
|
import io |
|
import os |
|
os.system("pip install openai") |
|
import openai |
|
openai.api_key = "" |
|
openai.api_base = "https://api.deepinfra.com/v1/openai" |
|
def todataimage(file, ext): |
|
buffered = io.BytesIO() |
|
file.save(buffered, format=ext) |
|
return "data:image/png;base64,"+base64.b64encode(buffered.getvalue()).decode("utf-8") |
|
def caption(file, ext): |
|
datimg = todataimage(file, ext) |
|
response = requests.post("https://russellc-comparing-captioning-models.hf.space/run/predict", json={ |
|
"data": [ |
|
datimg, |
|
]}).json() |
|
print(response) |
|
data = response["data"] |
|
chat_completion = openai.ChatCompletion.create( |
|
model="jondurbin/airoboros-l2-70b-gpt4-1.4.1", |
|
messages=[{"role": "system", "content": "you will be given descriptions of one image from a varity of image captioning models with a varity of quality, what you need to do is combine them into one image caption and make that be your output, no extras words like \"here is your output\", remeber, don't take too much information from low quality, or too little from high. do NOT contain ANY text other than the description"},{"role":"user", "content":"High Quality:\n"+data[1]+"\n"+data[3]+"\nMedium Qualitt:\n"+data[2]+"\nLow Quality\n"+data[0]}], |
|
) |
|
|
|
return chat_completion.choices[0].message.content |
|
def image_predict(image): |
|
return caption(image, "png") |
|
title = "Generating Captions For Images Using ImCap v1" |
|
description = """ImCap v1 uses a varity of image caption generators, then plugs it into LLaMA 2 to get a final caption.<br/> |
|
<a href="https://huggingface.co/spaces/SFP/ImCap_v1?duplicate=true" |
|
style="display: inline-block;margin-top: .5em;margin-right: .25em;" target="_blank"> |
|
<img style="margin-bottom: 0em;display: inline;margin-top: -.25em;" |
|
src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a> |
|
if there is a queue (free CPU works fine as it uses apis).""" |
|
iface = gr.Interface(image_predict, inputs=gr.Image(type="pil"), outputs="label", flagging_options=[], title=title, description=description) |
|
iface.launch(show_api=False) |