File size: 2,593 Bytes
6f5b327
52e7b1a
4f1688d
5cb55c0
 
 
6f5b327
1f6ea87
2775582
 
 
1f6ea87
 
5cb55c0
 
 
 
 
 
6f5b327
4f1688d
5cb55c0
 
4f1688d
 
538050d
 
9d42771
 
5cb55c0
 
 
 
 
 
 
 
9d42771
5cb55c0
 
 
 
4f1688d
 
109854c
b4b5f3c
5cb55c0
109854c
5cb55c0
4f1688d
 
 
5cb55c0
4f1688d
6f5b327
 
6838288
 
 
f6a9226
 
6838288
4f1688d
6f5b327
4f1688d
 
 
755858b
6838288
f6a9226
6f5b327
 
4f1688d
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import gradio as gr
from openai import OpenAI 
import os
from PIL import Image
import base64
import io

# OpenAI ํด๋ผ์ด์–ธํŠธ๋ฅผ API ํ‚ค๋กœ ์ดˆ๊ธฐํ™”( ๋ ˆ๊ฑฐ์‹œ๊ฐ€ ์˜ค๋ฅ˜๋‚˜์„œ ์ƒˆ๋กœ ์ˆ˜์ •ํ•จ)
api_key = os.getenv("OPENAI_API_KEY")
if api_key is None:
    raise ValueError("OPENAI_API_KEY ํ™˜๊ฒฝ ๋ณ€์ˆ˜๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.")
client = OpenAI(api_key=api_key)


def image_to_base64(image):
    buffered = io.BytesIO()
    image.save(buffered, format="JPEG")
    img_str = base64.b64encode(buffered.getvalue()).decode()
    return img_str

def extract_and_summarize(image):
    # Convert image to base64
    image_base64 = image_to_base64(image)
    
    # Prepare the prompt for GPT-4
    #์—ฌ๊ธฐ ์ด๋ฏธ์ง€๋กœ ๋œ ๋ฌธ์„œ๊ฐ€ ์žˆ์–ด. ๋ฌธ์„œ๋ฅผ ๋ณด๊ณ  ๋‚ด์šฉ์„ 3์ค„๋กœ ์š”์•ฝํ•ด. ์ œ์ถœํ•ด์•ผํ•˜๋Š” ๋ฌธ์„œ์˜ ๊ฒฝ์šฐ ์ œ์ถœ ๊ธฐ๊ฐ„๊ณผ ๋ฐฉ๋ฒ•์„ ๋ฐ˜๋“œ์‹œ ํฌํ•จํ•ด
    # Summarize the image document in 3 lines. If submission is needed, include the deadline, method, and link. Output in Korean
    # Summarize the image document in 3 lines. If submission details are included, add the deadline, method, and link. If there is no link, omit the link information. Output in Korean.

    prompt = [
        {
            "role": "system",
            "content": "You are a helpful assistant. Summarize the text content of the document image provided."
        },
        {
            "role": "user",
            "content": [
                {"type": "text", "text": "Summarize the image document in 3 lines. If submission details are included, add the deadline, method, and link. If there is no link, omit the link information. Output in Korean."},
                {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_base64}"}}
            ]
        }
    ]
    
    # Call GPT-4 API for summarization
    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=prompt,
        temperature=0.0,
        max_tokens=300,
    )
    
    # Extract summary from GPT-4 response
    summary = response.choices[0].message.content
    
    return summary

# ์ƒ˜ํ”Œ ๋ฌธ์„œ ์ด๋ฏธ์ง€ ์ถ”๊ฐ€


examples = ["./sample1.png", "./sample2.png", "./sample3.png"]


# Define Gradio interface
iface = gr.Interface(
    fn=extract_and_summarize,
    inputs=gr.Image(type="pil", label="Upload Document Image"),
    outputs=gr.Textbox(label="Summarized Text"),
    title="๊ณต๋ฌธ์„œ ์š”์•ฝ ์ƒ์„ฑ๊ธฐ",
    description="๋ฌธ์„œ์˜ ํ™”๋ฉด์„ ์บก์ณํ•˜์—ฌ ์—…๋กœ๋“œํ•˜๋ฉด ์š”์•ฝํ•ด์ค๋‹ˆ๋‹ค.",
    examples=examples
)

# Launch the interface
iface.launch()