Visual-Ritual / app.py
jethrovic's picture
Update app.py
e80bc76
import streamlit as st
import gradio as gr
from gradio_client import Client
import re
import torch
from transformers import pipeline
fuyu_client = Client("https://adept-fuyu-8b-demo.hf.space/")
def get_caption(image_in):
fuyu_result = fuyu_client.predict(
image_in, # str representing input in 'raw_image' Image component
True, # bool in 'Enable detailed captioning' Checkbox component
fn_index=2
)
# Find the last occurrence of "."
last_period_index = fuyu_result.rfind('.')
# Truncate the string up to the last period
truncated_caption = fuyu_result[:last_period_index + 1]
# print(truncated_caption)
print(f"\n—\nIMAGE CAPTION: {truncated_caption}")
return truncated_caption
#image_1 = st.file_uploader("Drag and drop an image here, or click to select one", type=["png", "jpg", "jpeg"])
# Display the uploaded image
#if image_1 is not None:
# Read the image
# image = Image.open(image_1)
# Display the image
#st.image(image, caption="Uploaded Image", use_column_width=True)
#get_caption(image)
def infer(image_in):
gr.Info("Getting image caption with Fuyu...")
user_prompt = get_caption(image_in)
write(user_prompt)
return user_prompt
with gr.Blocks(css=css) as demo:
with gr.Column(elem_id="col-container"):
gr.HTML(f"""
<h2 style="text-align: center;">LLM Agent from a Picture</h2>
<p style="text-align: center;">{description}</p>
""")
with gr.Row():
with gr.Column():
image_in = gr.Image(
label = "Image reference",
type = "filepath",
elem_id = "image-in"
)
submit_btn = gr.Button("Make LLM system from my pic !")
submit_btn.click(
fn = infer,
inputs = [
image_in
],
outputs =[
result
]
)