Spaces:

DeepLearning101
/

Multimodal-RAG-Agent

Running

App Files Files Community

DeepLearning101 commited on Apr 13

Commit

88a0eaa

verified ·

1 Parent(s): c7d2968

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -63

app.py CHANGED Viewed

@@ -7,7 +7,7 @@ import aiohttp
 LLM_API = os.environ.get("LLM_API")
 LLM_URL = os.environ.get("LLM_URL")
-USER_ID = "HuggingFace Space"  # Placeholder user ID
 async def send_chat_message(LLM_URL, LLM_API, user_input, file_id):
     payload = {
@@ -16,48 +16,32 @@ async def send_chat_message(LLM_URL, LLM_API, user_input, file_id):
         "response_mode": "streaming",
         "conversation_id": "",
         "user": USER_ID,
-        "files": [
-            {
-                "type": "image",
-                "transfer_method": "local_file",
-                "upload_file_id": file_id
-            }
-        ]
     }
-    print("Sending chat message payload:", payload)  # Debug information
     async with aiohttp.ClientSession() as session:
         async with session.post(
             f"{LLM_URL}/chat-messages",
             headers={"Authorization": f"Bearer {LLM_API}"},
             json=payload
         ) as response:
-            print("Request URL:", f"{LLM_URL}/chat-messages")
-            print("Response status code:", response.status)
             if response.status == 404:
                 return "Error: Endpoint not found (404)"
             last_thought = None
             async for line in response.content:
                 if line:
                     try:
-                        # 去掉前面的 "data: " 字串並解析 JSON
-                        line_data = json.loads(line.decode("utf-8").replace("data: ", ""))
-                        print("Line data:", line_data)  # Debug: 輸出每行的資料內容
-                        # 提取含有 `thought` 或 `answer` 的資料
-                        if line_data.get("data", {}).get("outputs", {}).get("answer"):
-                            last_thought = line_data["data"]["outputs"]["answer"]
-                            break  # 找到答案後退出迴圈
-                    except (IndexError, json.JSONDecodeError) as e:
-                        print("Error parsing line:", e)  # Debug: 輸出解析錯誤訊息
                         continue
-            if last_thought:
-                return last_thought.strip()
-            else:
-                return "Error: No thought or answer found in the response"
 async def upload_file(LLM_URL, LLM_API, file_path, user_id):
     if not os.path.exists(file_path):
@@ -68,43 +52,30 @@ async def upload_file(LLM_URL, LLM_API, file_path, user_id):
             form_data = aiohttp.FormData()
             form_data.add_field('file', f, filename=file_path, content_type=mime_type)
             form_data.add_field('user', user_id)
             async with session.post(
                 f"{LLM_URL}/files/upload",
                 headers={"Authorization": f"Bearer {LLM_API}"},
                 data=form_data
             ) as response:
-                print("Upload response status code:", response.status)  # Debug information
                 if response.status == 404:
-                    return "Error: Endpoint not found (404)"
-                response_text = await response.text()
-                print("Raw upload response text:", response_text)  # Debug information
                 try:
-                    response_json = json.loads(response_text)
-                    file_id = response_json.get("id")
-                    if file_id:
-                        return response_json
-                    else:
-                        return "Error: No file ID returned in upload response"
                 except json.JSONDecodeError:
-                    return "Error: Invalid JSON response"
 async def handle_input(file_path, user_input):
     upload_response = await upload_file(LLM_URL, LLM_API, file_path, USER_ID)
-    print("Upload response:", upload_response)  # Debug information
     if isinstance(upload_response, str) and "Error" in upload_response:
         return upload_response
-    file_id = upload_response.get("id")  # Extract file ID from the response
     if not file_id:
-        return "Error: No file ID returned from upload"
-    chat_response = await send_chat_message(LLM_URL, LLM_API, user_input, file_id)
-    print("Chat response:", chat_response)  # Debug information
-    return chat_response
-# 定義界面標題和描述
 TITLE = """<h1>Multimodal RAG Playground 💬 輸入工地照片，生成工地場景及相關法規和缺失描述</h1>"""
 SUBTITLE = """<h2><a href='https://www.twman.org' target='_blank'>TonTon Huang Ph.D.</a> | <a href='https://blog.twman.org/p/deeplearning101.html' target='_blank'>手把手帶你一起踩AI坑</a><br></h2>"""
 LINKS = """
@@ -123,12 +94,6 @@ LINKS = """
 <a href='https://blog.twman.org/2023/07/HugIE.html' target='_blank'>基於機器閱讀理解和指令微調的統一信息抽取框架之診斷書醫囑資訊擷取分析</a><br>
 """
-# Define Gradio interface
-file_input = gr.Image(label='圖片上傳', type='filepath')
-user_input = gr.Textbox(label='輸入問題描述', value="分析一下這張工地場景照片", placeholder="請輸入您的問題描述...")
-output_text = gr.Textbox(label="結果輸出", lines=4)
-# # 範例資料
 examples = [
     ['DEMO/DEMO_0004.jpg', '0004-51'],
     ['DEMO/DEMO_0005.jpg', '0005-92'],
@@ -137,16 +102,30 @@ examples = [
     ['DEMO/DEMO_0011.jpg', '0011-108'],
 ]
-with gr.Blocks() as iface:
     gr.HTML(TITLE)
     gr.HTML(SUBTITLE)
     gr.HTML(LINKS)
-    gr.Interface(
         fn=handle_input,
-        inputs=[file_input, user_input],
-        outputs="text",
         examples=examples,
-        flagging_mode="never"  # 更新此處
     )
-iface.launch()

 LLM_API = os.environ.get("LLM_API")
 LLM_URL = os.environ.get("LLM_URL")
+USER_ID = "HuggingFace Space"
 async def send_chat_message(LLM_URL, LLM_API, user_input, file_id):
     payload = {
         "response_mode": "streaming",
         "conversation_id": "",
         "user": USER_ID,
+        "files": [{
+            "type": "image",
+            "transfer_method": "local_file",
+            "upload_file_id": file_id
+        }]
     }
     async with aiohttp.ClientSession() as session:
         async with session.post(
             f"{LLM_URL}/chat-messages",
             headers={"Authorization": f"Bearer {LLM_API}"},
             json=payload
         ) as response:
             if response.status == 404:
                 return "Error: Endpoint not found (404)"
             last_thought = None
             async for line in response.content:
                 if line:
                     try:
+                        data = json.loads(line.decode("utf-8").replace("data: ", ""))
+                        if data.get("data", {}).get("outputs", {}).get("answer"):
+                            last_thought = data["data"]["outputs"]["answer"]
+                            break
+                    except Exception:
                         continue
+            return last_thought.strip() if last_thought else "Error: No answer found."
 async def upload_file(LLM_URL, LLM_API, file_path, user_id):
     if not os.path.exists(file_path):
             form_data = aiohttp.FormData()
             form_data.add_field('file', f, filename=file_path, content_type=mime_type)
             form_data.add_field('user', user_id)
             async with session.post(
                 f"{LLM_URL}/files/upload",
                 headers={"Authorization": f"Bearer {LLM_API}"},
                 data=form_data
             ) as response:
                 if response.status == 404:
+                    return "Error: Upload endpoint not found"
+                text = await response.text()
                 try:
+                    json_resp = json.loads(text)
+                    return json_resp
                 except json.JSONDecodeError:
+                    return "Error: Upload returned invalid JSON"
 async def handle_input(file_path, user_input):
     upload_response = await upload_file(LLM_URL, LLM_API, file_path, USER_ID)
     if isinstance(upload_response, str) and "Error" in upload_response:
         return upload_response
+    file_id = upload_response.get("id")
     if not file_id:
+        return "Error: No file ID from upload"
+    return await send_chat_message(LLM_URL, LLM_API, user_input, file_id)
+# --- Gradio UI 設定 --- 定義界面標題和描述
 TITLE = """<h1>Multimodal RAG Playground 💬 輸入工地照片，生成工地場景及相關法規和缺失描述</h1>"""
 SUBTITLE = """<h2><a href='https://www.twman.org' target='_blank'>TonTon Huang Ph.D.</a> | <a href='https://blog.twman.org/p/deeplearning101.html' target='_blank'>手把手帶你一起踩AI坑</a><br></h2>"""
 LINKS = """
 <a href='https://blog.twman.org/2023/07/HugIE.html' target='_blank'>基於機器閱讀理解和指令微調的統一信息抽取框架之診斷書醫囑資訊擷取分析</a><br>
 """
 examples = [
     ['DEMO/DEMO_0004.jpg', '0004-51'],
     ['DEMO/DEMO_0005.jpg', '0005-92'],
     ['DEMO/DEMO_0011.jpg', '0011-108'],
 ]
+with gr.Blocks() as demo:
     gr.HTML(TITLE)
     gr.HTML(SUBTITLE)
     gr.HTML(LINKS)
+    with gr.Row():
+        image_input = gr.Image(label='📷 上傳照片', type='filepath')
+        text_input = gr.Textbox(label='💬 輸入問題描述', value="分析一下這張工地場景照片")
+    output_box = gr.Textbox(label="📝 回應結果", lines=8)
+    submit_button = gr.Button("🚀 開始分析")
+    submit_button.click(
         fn=handle_input,
+        inputs=[image_input, text_input],
+        outputs=[output_box]
+    )
+    gr.Examples(
         examples=examples,
+        inputs=[image_input, text_input],
+        outputs=[output_box],
+        label="點擊以下範例自動帶入"
     )
+demo.launch()