File size: 1,951 Bytes
c181102
2ab4c57
 
e80bc76
 
 
 
c181102
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e80bc76
c181102
 
e80bc76
c181102
e80bc76
 
c181102
e80bc76
 
2ee6475
e80bc76
 
 
 
 
c181102
e80bc76
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import streamlit as st
import gradio as gr
from gradio_client import Client
import re
import torch
from transformers import pipeline


fuyu_client = Client("https://adept-fuyu-8b-demo.hf.space/")
def get_caption(image_in):
    
    fuyu_result = fuyu_client.predict(
	    image_in,	# str representing input in 'raw_image' Image component
	    True,	# bool  in 'Enable detailed captioning' Checkbox component
		fn_index=2
    )

    # Find the last occurrence of "."
    last_period_index = fuyu_result.rfind('.')

    # Truncate the string up to the last period
    truncated_caption = fuyu_result[:last_period_index + 1]

    # print(truncated_caption)
    print(f"\n—\nIMAGE CAPTION: {truncated_caption}")
    
    return truncated_caption

#image_1 = st.file_uploader("Drag and drop an image here, or click to select one", type=["png", "jpg", "jpeg"])

# Display the uploaded image
#if image_1 is not None:
    # Read the image
    # image = Image.open(image_1)
    
    # Display the image
    #st.image(image, caption="Uploaded Image", use_column_width=True)
    #get_caption(image)

def infer(image_in):
    gr.Info("Getting image caption with Fuyu...")
    user_prompt = get_caption(image_in)
    write(user_prompt)
    return user_prompt

with gr.Blocks(css=css) as demo:
    with gr.Column(elem_id="col-container"):
        gr.HTML(f"""
        <h2 style="text-align: center;">LLM Agent from a Picture</h2>
        <p style="text-align: center;">{description}</p>
        """)
        
        with gr.Row():
            with gr.Column():
                image_in = gr.Image(
                    label = "Image reference",
                    type = "filepath",
                    elem_id = "image-in"
                )
                submit_btn = gr.Button("Make LLM system from my pic !")
    submit_btn.click(
        fn = infer,
        inputs = [
            image_in
        ],
        outputs =[
            result
        ]
    )