Spaces:

jethrovic
/

Visual-Ritual

Build error

File size: 1,951 Bytes

import streamlit as st
import gradio as gr
from gradio_client import Client
import re
import torch
from transformers import pipeline


fuyu_client = Client("https://adept-fuyu-8b-demo.hf.space/")
def get_caption(image_in):
    
    fuyu_result = fuyu_client.predict(
	    image_in,	# str representing input in 'raw_image' Image component
	    True,	# bool  in 'Enable detailed captioning' Checkbox component
		fn_index=2
    )

    # Find the last occurrence of "."
    last_period_index = fuyu_result.rfind('.')

    # Truncate the string up to the last period
    truncated_caption = fuyu_result[:last_period_index + 1]

    # print(truncated_caption)
    print(f"\n—\nIMAGE CAPTION: {truncated_caption}")
    
    return truncated_caption

#image_1 = st.file_uploader("Drag and drop an image here, or click to select one", type=["png", "jpg", "jpeg"])

# Display the uploaded image
#if image_1 is not None:
    # Read the image
    # image = Image.open(image_1)
    
    # Display the image
    #st.image(image, caption="Uploaded Image", use_column_width=True)
    #get_caption(image)

def infer(image_in):
    gr.Info("Getting image caption with Fuyu...")
    user_prompt = get_caption(image_in)
    write(user_prompt)
    return user_prompt

with gr.Blocks(css=css) as demo:
    with gr.Column(elem_id="col-container"):
        gr.HTML(f"""
        <h2 style="text-align: center;">LLM Agent from a Picture</h2>
        <p style="text-align: center;">{description}</p>
        """)
        
        with gr.Row():
            with gr.Column():
                image_in = gr.Image(
                    label = "Image reference",
                    type = "filepath",
                    elem_id = "image-in"
                )
                submit_btn = gr.Button("Make LLM system from my pic !")
    submit_btn.click(
        fn = infer,
        inputs = [
            image_in
        ],
        outputs =[
            result
        ]
    )