File size: 7,416 Bytes
60549f8
 
 
 
0233ec6
 
0f11297
 
0e2dc36
0f11297
 
 
 
 
 
 
 
 
60549f8
 
0e2dc36
60549f8
0233ec6
60549f8
 
 
 
 
 
 
 
 
 
 
 
 
 
0233ec6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0e2dc36
60549f8
0233ec6
60549f8
 
 
 
0233ec6
f3c77fe
 
 
0233ec6
 
 
f3c77fe
0233ec6
 
 
 
 
 
 
 
60549f8
0233ec6
 
0e2dc36
60549f8
0e2dc36
60549f8
 
0e2dc36
60549f8
0e2dc36
60549f8
dff1a2d
 
 
60549f8
 
 
 
 
 
 
ebdd83b
885b4b1
cc6eaea
8cc52e7
4a6ec9d
 
 
 
 
 
 
 
 
 
 
 
cc6eaea
0e2dc36
 
60549f8
 
 
 
0e2dc36
 
 
 
 
 
 
 
 
 
 
 
 
 
60549f8
0e2dc36
 
60549f8
 
0e2dc36
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import gradio as gr
import requests
import mimetypes
import json, os
import asyncio
import aiohttp
import subprocess

# pip 升級
def upgrade_pip():
    try:
        subprocess.check_call([os.sys.executable, "-m", "pip", "install", "--upgrade", "pip"])
        print("pip 升級成功")
    except subprocess.CalledProcessError:
        print("pip 升級失敗")

upgrade_pip()

LLM_API = os.environ.get("LLM_API")
LLM_URL = os.environ.get("LLM_URL")
USER_ID = "HuggingFace Space"

async def send_chat_message(LLM_URL, LLM_API, category, file_id):
    payload = {
        "inputs": {},
        "query": category,
        "response_mode": "streaming",
        "conversation_id": "",
        "user": USER_ID,
        "files": [
            {
                "type": "image",
                "transfer_method": "local_file",
                "upload_file_id": file_id
            }
        ]
    }
    async with aiohttp.ClientSession() as session:
        async with session.post(
            f"{LLM_URL}/chat-messages",
            headers={"Authorization": f"Bearer {LLM_API}"},
            json=payload
        ) as response:
            if response.status == 404:
                return "Error: Endpoint not found (404)"
            last_thought = None
            async for line in response.content:
                if line:
                    try:
                        data = json.loads(line.split(b"data: ")[1].decode("utf-8"))
                        if data.get("event") == "agent_thought":
                            last_thought = data.get("thought")
                    except (IndexError, json.JSONDecodeError):
                        continue
            return last_thought.strip() if last_thought else "Error: No thought found in the response"

async def upload_file(LLM_URL, LLM_API, file_path, user_id):
    if not os.path.exists(file_path):
        return f"Error: File {file_path} not found"
    mime_type, _ = mimetypes.guess_type(file_path)
    with open(file_path, 'rb') as f:
        async with aiohttp.ClientSession() as session:
            form_data = aiohttp.FormData()
            form_data.add_field('file', f, filename=file_path, content_type=mime_type)
            form_data.add_field('user', user_id)
            async with session.post(
                f"{LLM_URL}/files/upload",
                headers={"Authorization": f"Bearer {LLM_API}"},
                data=form_data
            ) as response:
                if response.status == 404:
                    return "Error: Endpoint not found (404)"
                response_text = await response.text()
                try:
                    return json.loads(response_text)
                except json.JSONDecodeError:
                    return "Error: Invalid JSON response"

async def handle_input(file_path, category):
    upload_response = await upload_file(LLM_URL, LLM_API, file_path, USER_ID)
    if isinstance(upload_response, str) and upload_response.startswith("Error"):
        return upload_response
    file_id = upload_response.get("id")
    if not file_id:
        return "Error: No file ID returned from upload"
    return await send_chat_message(LLM_URL, LLM_API, category, file_id)

# UI 元件 & 資料
examples = [
    ['DEMO/boarding-pass.png', '機票'],    
    ['DEMO/taxi.jpg', '計程車乘車證明'],    
    ['DEMO/etag.jpg', '通行明細 (etag)'],
    ["DEMO/qrcode.jpg", 'QRCODE發票'],
    ['DEMO/mthsr.JPG', '超商高鐵車票'],
    ['DEMO/thsr.jpg', '高鐵車票'],
    ['DEMO/mtra.jpg', '超商台鐵車票'],
    ['DEMO/tra.JPG', '台鐵車票'],
]

TITLE = """<h1>Multimodal Playground 💬 輸入各種單據並選擇種類,解析得到各種關鍵資訊 </h1>"""
SUBTITLE = """<h2><a href='https://www.twman.org' target='_blank'>TonTon Huang Ph.D.</a> | <a href='https://blog.twman.org/p/deeplearning101.html' target='_blank'>手把手帶你一起踩AI坑</a><br></h2>"""
LINKS = """
<a href='https://github.com/Deep-Learning-101' target='_blank'>Deep Learning 101 Github</a> | <a href='http://deeplearning101.twman.org' target='_blank'>Deep Learning 101</a> | <a href='https://www.facebook.com/groups/525579498272187/' target='_blank'>台灣人工智慧社團 FB</a> | <a href='https://www.youtube.com/c/DeepLearning101' target='_blank'>YouTube</a><br>
<a href='https://blog.twman.org/2025/03/AIAgent.html' target='_blank'>那些 AI Agent 要踩的坑</a>:探討多種 AI 代理人工具的應用經驗與挑戰,分享實用經驗與工具推薦。<br>
<a href='https://blog.twman.org/2024/08/LLM.html' target='_blank'>白話文手把手帶你科普 GenAI</a>:淺顯介紹生成式人工智慧核心概念,強調硬體資源和數據的重要性。<br>
<a href='https://blog.twman.org/2024/09/LLM.html' target='_blank'>大型語言模型直接就打完收工?</a>:回顧 LLM 領域探索歷程,討論硬體升級對 AI 開發的重要性。<br>
<a href='https://blog.twman.org/2024/07/RAG.html' target='_blank'>那些檢索增強生成要踩的坑</a>:探討 RAG 技術應用與挑戰,提供實用經驗分享和工具建議。<br>
<a href='https://blog.twman.org/2024/02/LLM.html' target='_blank'>那些大型語言模型要踩的坑</a>:探討多種 LLM 工具的應用與挑戰,強調硬體資源的重要性。<br>
<a href='https://blog.twman.org/2023/04/GPT.html' target='_blank'>Large Language Model,LLM</a>:探討 LLM 的發展與應用,強調硬體資源在開發中的關鍵作用。。<br>
<a href='https://blog.twman.org/2024/11/diffusion.html' target='_blank'>ComfyUI + Stable Diffuision</a>:深入探討影像生成與分割技術的應用,強調硬體資源的重要性。<br>
<a href='https://blog.twman.org/2024/02/asr-tts.html' target='_blank'>那些ASR和TTS可能會踩的坑</a>:探討 ASR 和 TTS 技術應用中的問題,強調數據質量的重要性。<br>
<a href='https://blog.twman.org/2021/04/NLP.html' target='_blank'>那些自然語言處理 (Natural Language Processing, NLP) 踩的坑</a>:分享 NLP 領域的實踐經驗,強調數據質量對模型效果的影響。<br>
<a href='https://blog.twman.org/2021/04/ASR.html' target='_blank'>那些語音處理 (Speech Processing) 踩的坑</a>:分享語音處理領域的實務經驗,強調資料品質對模型效果的影響。<br>
<a href='https://blog.twman.org/2023/07/wsl.html' target='_blank'>用PPOCRLabel來幫PaddleOCR做OCR的微調和標註</a><br>
<a href='https://blog.twman.org/2023/07/HugIE.html' target='_blank'>基於機器閱讀理解和指令微調的統一信息抽取框架之診斷書醫囑資訊擷取分析</a><br>
"""

# Gradio Blocks 寫法(全新修正)
with gr.Blocks() as iface:
    gr.HTML(TITLE)
    gr.HTML(SUBTITLE)
    gr.HTML(LINKS)

    with gr.Row():
        file_input = gr.Image(label='圖片上傳', type='filepath')
        category = gr.Radio(label="Message Category", choices=[
            "機票", "計程車乘車證明", "通行明細 (etag)", "QRCODE發票",
            "超商高鐵車票", "高鐵車票", "超商台鐵車票", "台鐵車票"
        ])
    
    submit_btn = gr.Button("解析")
    output_text = gr.Textbox(label="解析結果", lines=10)

    submit_btn.click(fn=handle_input, inputs=[file_input, category], outputs=output_text)

    gr.Examples(
        examples=examples,
        inputs=[file_input, category],
        label="範例圖片與類型"
    )

iface.launch()