typhoon-ocr

Sleeping

App Files Files Community

protae5544 commited on Jun 7

Commit

107f2cd

verified ·

1 Parent(s): fe3ba7e

Update app.py

Browse files

Files changed (1) hide show

app.py +107 -48

app.py CHANGED Viewed

@@ -2,22 +2,23 @@ import base64
 from io import BytesIO
 import json
 import os
 from openai import OpenAI
 from dotenv import load_dotenv
 from typhoon_ocr import prepare_ocr_messages
 import gradio as gr
 from PIL import Image
-# โหลด environment variables จาก .env
 load_dotenv()
-# ตั้งค่า OpenAI (ใช้ API ของ Typhoon OCR)
 openai = OpenAI(
     base_url=os.getenv("TYPHOON_BASE_URL"),
     api_key=os.getenv("TYPHOON_API_KEY")
 )
-# ตั้งค่า Theme (ใช้ของเดิม)
 theme = gr.themes.Soft(
     primary_hue=gr.themes.Color(
         c50="#f7f7fd",
@@ -40,61 +41,122 @@ theme = gr.themes.Soft(
 OUTPUT_FILE = "ocr_results.txt"
 def save_ocr_result(text):
-    """บันทึกผลลัพธ์ OCR แบบต่อเนื่องในไฟล์เดียว พร้อมเว้น 2 บรรทัดระหว่างข้อมูล"""
     with open(OUTPUT_FILE, "a", encoding="utf-8") as f:
         f.write(text + "\n\n")
     return OUTPUT_FILE
 def clear_output_file():
-    """ล้างไฟล์ผลลัพธ์เก่า (เรียกครั้งเดียวเมื่อเริ่มใช้งานใหม่)"""
     with open(OUTPUT_FILE, "w", encoding="utf-8") as f:
         f.write("")
-def process_pdf(pdf_or_image_file, task_type, page_number):
     if pdf_or_image_file is None:
         return None, "No file uploaded"
     orig_filename = pdf_or_image_file.name
     try:
-        # ใช้ prepare_ocr_messages ตามเดิม
-        messages = prepare_ocr_messages(
-            pdf_or_image_path=orig_filename,
-            task_type=task_type,
-            target_image_dim=1800,
-            target_text_length=8000,
-            page_num=page_number if page_number else 1
-        )
-        # ดึงภาพจากผลลัพธ์
-        image_url = messages[0]["content"][1]["image_url"]["url"]
-        image_base64 = image_url.replace("data:image/png;base64,", "")
-        image_pil = Image.open(BytesIO(base64.b64decode(image_base64)))
-        # ส่งไป API
-        response = openai.chat.completions.create(
-            model=os.getenv("TYPHOON_OCR_MODEL"),
-            messages=messages,
-            max_tokens=16384,
-            extra_body={
-                "repetition_penalty": 1.2,
-                "temperature": 0.1,
-                "top_p": 0.6,
-            },
-        )
-        text_output = response.choices[0].message.content
-        # ดึง natural_text
-        try:
-            json_data = json.loads(text_output)
-            markdown_out = json_data.get('natural_text', "").replace("<figure>", "").replace("</figure>", "")
-        except Exception as e:
-            markdown_out = f"⚠️ Could not extract `natural_text` from output.\nError: {str(e)}"
-        # บันทึกผลลัพธ์ต่อเนื่องในไฟล์
-        save_ocr_result(markdown_out)
-        return image_pil, markdown_out, gr.File.update(value=OUTPUT_FILE)
     except Exception as e:
         return None, f"Error processing file: {str(e)}", None
@@ -148,9 +210,6 @@ with gr.Blocks(theme=theme) as demo:
                     font-size: 12px;
                 }
                 """
-            # เลือกเพจ
-            page_number = gr.Number(label="📄 Page Number (for PDFs only)", value=1, minimum=1, step=1)
             # ปุ่มรัน
             run_button = gr.Button("🚀 Run")
@@ -167,11 +226,11 @@ with gr.Blocks(theme=theme) as demo:
     # เชื่อมต่อ UI กับฟังก์ชัน
     run_button.click(
         fn=process_pdf,
-        inputs=[pdf_input, task_dropdown, page_number],
         outputs=[image_output, markdown_output, download_button]
     )
-# เรียกครั้งเดียวเมื่อเริ่มเพื่อล้างไฟล์เก่า
 clear_output_file()
 # รันแอป

 from io import BytesIO
 import json
 import os
+import PyPDF2  # เพิ่มไลบรารีสำหรับอ่าน PDF
 from openai import OpenAI
 from dotenv import load_dotenv
 from typhoon_ocr import prepare_ocr_messages
 import gradio as gr
 from PIL import Image
+# โหลด environment variables
 load_dotenv()
+# ตั้งค่า OpenAI API
 openai = OpenAI(
     base_url=os.getenv("TYPHOON_BASE_URL"),
     api_key=os.getenv("TYPHOON_API_KEY")
 )
+# ตั้งค่า Theme (เดิม)
 theme = gr.themes.Soft(
     primary_hue=gr.themes.Color(
         c50="#f7f7fd",
 OUTPUT_FILE = "ocr_results.txt"
 def save_ocr_result(text):
+    """บันทึกผลลัพธ์ OCR แบบต่อเนื่องในไฟล์เดียว"""
     with open(OUTPUT_FILE, "a", encoding="utf-8") as f:
         f.write(text + "\n\n")
     return OUTPUT_FILE
 def clear_output_file():
+    """ล้างไฟล์ผลลัพธ์เก่า"""
     with open(OUTPUT_FILE, "w", encoding="utf-8") as f:
         f.write("")
+def get_pdf_page_count(pdf_path):
+    """หาจำนวนหน้าของ PDF"""
+    with open(pdf_path, 'rb') as f:
+        reader = PyPDF2.PdfReader(f)
+        return len(reader.pages)
+    return 0
+def process_pdf(pdf_or_image_file, task_type):
     if pdf_or_image_file is None:
         return None, "No file uploaded"
     orig_filename = pdf_or_image_file.name
+    combined_text = ""
+    image_pil = None  # ใช้เก็บภาพหน้าแรกของ PDF
     try:
+        # ตรวจสอบว่าเป็น PDF หรือไม่
+        if orig_filename.lower().endswith(".pdf"):
+            total_pages = get_pdf_page_count(orig_filename)
+            if total_pages == 0:
+                return None, "ไม่สามารถอ่านจำนวนหน้าของ PDF ได้"
+            # ประมวลผลทุกหน้า
+            for page_num in range(1, total_pages + 1):
+                # เตรียมข้อมูลสำหรับ OCR
+                messages = prepare_ocr_messages(
+                    pdf_or_image_path=orig_filename,
+                    task_type=task_type,
+                    target_image_dim=1800,
+                    target_text_length=8000,
+                    page_num=page_num
+                )
+                # ดึงภาพหน้าแรก
+                if page_num == 1:
+                    image_url = messages[0]["content"][1]["image_url"]["url"]
+                    image_base64 = image_url.replace("data:image/png;base64,", "")
+                    image_pil = Image.open(BytesIO(base64.b64decode(image_base64)))
+                # ส่งไป API
+                response = openai.chat.completions.create(
+                    model=os.getenv("TYPHOON_OCR_MODEL"),
+                    messages=messages,
+                    max_tokens=16384,
+                    extra_body={
+                        "repetition_penalty": 1.2,
+                        "temperature": 0.1,
+                        "top_p": 0.6,
+                    },
+                )
+                text_output = response.choices[0].message.content
+                # ดึง natural_text
+                try:
+                    json_data = json.loads(text_output)
+                    markdown_out = json_data.get('natural_text', "").replace("<figure>", "").replace("</figure>", "")
+                except Exception as e:
+                    markdown_out = f"⚠️ Could not extract `natural_text` from output.\nError: {str(e)}"
+                # รวมผลลัพธ์ทุกหน้า
+                combined_text += f"[Page {page_num}]\n{markdown_out}\n\n"
+            # บันทึกผลลัพธ์ทั้งหมดลงไฟล์
+            save_ocr_result(combined_text)
+            return image_pil, combined_text, gr.File.update(value=OUTPUT_FILE)
+        # หากเป็นไฟล์ภาพ
+        else:
+            # ประมวลผลหน้าเดียว
+            messages = prepare_ocr_messages(
+                pdf_or_image_path=orig_filename,
+                task_type=task_type,
+                target_image_dim=1800,
+                target_text_length=8000,
+                page_num=1
+            )
+            # ดึงภาพ
+            image_url = messages[0]["content"][1]["image_url"]["url"]
+            image_base64 = image_url.replace("data:image/png;base64,", "")
+            image_pil = Image.open(BytesIO(base64.b64decode(image_base64)))
+            # ส่งไป API
+            response = openai.chat.completions.create(
+                model=os.getenv("TYPHOON_OCR_MODEL"),
+                messages=messages,
+                max_tokens=16384,
+                extra_body={
+                    "repetition_penalty": 1.2,
+                    "temperature": 0.1,
+                    "top_p": 0.6,
+                },
+            )
+            text_output = response.choices[0].message.content
+            # ดึง natural_text
+            try:
+                json_data = json.loads(text_output)
+                markdown_out = json_data.get('natural_text', "").replace("<figure>", "").replace("</figure>", "")
+            except Exception as e:
+                markdown_out = f"⚠️ Could not extract `natural_text` from output.\nError: {str(e)}"
+            # บันทึกผลลัพธ์ลงไฟล์
+            save_ocr_result(markdown_out)
+            return image_pil, markdown_out, gr.File.update(value=OUTPUT_FILE)
     except Exception as e:
         return None, f"Error processing file: {str(e)}", None
                     font-size: 12px;
                 }
                 """
             # ปุ่มรัน
             run_button = gr.Button("🚀 Run")
     # เชื่อมต่อ UI กับฟังก์ชัน
     run_button.click(
         fn=process_pdf,
+        inputs=[pdf_input, task_dropdown],
         outputs=[image_output, markdown_output, download_button]
     )
+# เริ่มต้นใหม่ (ล้างไฟล์ผลลัพธ์เก่า)
 clear_output_file()
 # รันแอป