Spaces:

artificialguybr
/

Surya-OCR

Running on T4

App Files Files Community

artificialguybr commited on Mar 4, 2024

Commit

b44d45c

verified ·

1 Parent(s): 358b810

Update app.py

Browse files

Files changed (1) hide show

app.py +140 -60

app.py CHANGED Viewed

@@ -5,104 +5,184 @@ from PIL import Image
 import os
 import tempfile
-# Função auxiliar para salvar imagem temporariamente e retornar o caminho
 def save_temp_image(img):
     temp_dir = tempfile.mkdtemp()
     img_path = os.path.join(temp_dir, "input_image.png")
     img.save(img_path)
     return img_path, temp_dir
-# Função para executar o OCR via linha de comando
 def ocr_function_cli(img, lang_name):
     img_path, temp_dir = save_temp_image(img)
-    # Substitua 'surya_ocr' pelo comando correto no seu sistema
     command = f"surya_ocr {img_path} --langs {lang_name} --images --results_dir {temp_dir}"
-    # Executar o comando
-    subprocess.run(command, shell=True, check=True)
-    # Aqui você precisa ajustar os caminhos conforme a saída do seu comando
-    result_img_path = os.path.join(temp_dir, "image_with_text.png")  # Ajuste conforme necessário
-    result_text_path = os.path.join(temp_dir, "results.json")  # Ajuste conforme necessário
-    # Carregar a imagem resultante
     if os.path.exists(result_img_path):
         result_img = Image.open(result_img_path)
     else:
-        result_img = img  # Retorna a imagem original se não encontrar a imagem processada
-    # Carregar o texto resultante
     if os.path.exists(result_text_path):
-        with open(result_text_path, "r") as file:
             result_text = json.load(file)
-            # Ajuste a extração do texto conforme o formato do seu JSON
-            text_output = "\n".join([str(page) for page in result_text.values()])
     else:
         text_output = "No text detected"
-    # Limpeza
-    os.remove(img_path)  # Remove a imagem temporária
-    # opcional: remover diretório temporário e seus conteúdos, se necessário
     return result_img, text_output
-# Função para detecção de linhas de texto via linha de comando
 def text_line_detection_function_cli(img):
     img_path, temp_dir = save_temp_image(img)
-    # Substitua 'surya_detect' pelo comando correto no seu sistema
     command = f"surya_detect {img_path} --images --results_dir {temp_dir}"
-    # Executar o comando
-    subprocess.run(command, shell=True, check=True)
-    # Aqui você precisa ajustar os caminhos conforme a saída do seu comando
-    result_img_path = os.path.join(temp_dir, "image_with_lines.png")  # Ajuste conforme necessário
-    result_json_path = os.path.join(temp_dir, "results.json")  # Ajuste conforme necessário
-    # Carregar a imagem resultante
     if os.path.exists(result_img_path):
         result_img = Image.open(result_img_path)
     else:
-        result_img = img  # Retorna a imagem original se não encontrar a imagem processada
-    # Carregar os resultados JSON
     if os.path.exists(result_json_path):
-        with open(result_json_path, "r") as file:
             result_json = json.load(file)
     else:
         result_json = {"error": "No detection results found"}
-    # Limpeza
-    os.remove(img_path)  # Remove a imagem temporária
-    # opcional: remover diretório temporário e seus conteúdos, se necessário
     return result_img, result_json
-# Interface Gradio
 with gr.Blocks() as app:
-    gr.Markdown("# Surya OCR e Detecção de Linhas de Texto via CLI")
     with gr.Tab("OCR"):
         with gr.Column():
-            ocr_input_image = gr.Image(label="Imagem de Entrada para OCR", type="pil")
-            ocr_language_selector = gr.Dropdown(label="Selecione o Idioma para OCR", choices=["English", "Portuguese"], value="English")
-            ocr_run_button = gr.Button("Executar OCR")
         with gr.Column():
-            ocr_output_image = gr.Image(label="Imagem de Saída do OCR", type="pil", interactive=False)
-            ocr_text_output = gr.TextArea(label="Texto Reconhecido")
-        ocr_run_button.click(fn=ocr_function_cli, inputs=[ocr_input_image, ocr_language_selector], outputs=[ocr_output_image, ocr_text_output])
-    with gr.Tab("Detecção de Linhas de Texto"):
         with gr.Column():
-            detection_input_image = gr.Image(label="Imagem de Entrada para Detecção", type="pil")
-            detection_run_button = gr.Button("Executar Detecção de Linhas de Texto")
         with gr.Column():
-            detection_output_image = gr.Image(label="Imagem de Saída da Detecção", type="pil", interactive=False)
-            detection_json_output = gr.JSON(label="Saída JSON da Detecção")
-        detection_run_button.click(fn=text_line_detection_function_cli, inputs=detection_input_image, outputs=[detection_output_image, detection_json_output])
 if __name__ == "__main__":
-    app.launch()

 import os
 import tempfile
 def save_temp_image(img):
     temp_dir = tempfile.mkdtemp()
     img_path = os.path.join(temp_dir, "input_image.png")
     img.save(img_path)
     return img_path, temp_dir
 def ocr_function_cli(img, lang_name):
     img_path, temp_dir = save_temp_image(img)
     command = f"surya_ocr {img_path} --langs {lang_name} --images --results_dir {temp_dir}"
+    try:
+        subprocess.run(command, shell=True, check=True, encoding='utf-8')
+    except subprocess.CalledProcessError as e:
+        print(f"OCR command failed: {e.output}")
+        return img, "OCR failed"
+    result_img_path = os.path.join(temp_dir, "image_with_text.png")
+    result_text_path = os.path.join(temp_dir, "results.json")
     if os.path.exists(result_img_path):
         result_img = Image.open(result_img_path)
     else:
+        result_img = img
     if os.path.exists(result_text_path):
+        with open(result_text_path, "r", encoding='utf-8') as file:
             result_text = json.load(file)
+        text_output = "\n".join([str(page) for page in result_text.values()])
     else:
         text_output = "No text detected"
+    os.remove(img_path)
     return result_img, text_output
 def text_line_detection_function_cli(img):
     img_path, temp_dir = save_temp_image(img)
     command = f"surya_detect {img_path} --images --results_dir {temp_dir}"
+    try:
+        subprocess.run(command, shell=True, check=True, encoding='utf-8')
+    except subprocess.CalledProcessError as e:
+        print(f"Text line detection command failed: {e.output}")
+        return img, {"error": "Detection failed"}
+    result_img_path = os.path.join(temp_dir, "image_with_lines.png")
+    result_json_path = os.path.join(temp_dir, "results.json")
     if os.path.exists(result_img_path):
         result_img = Image.open(result_img_path)
     else:
+        result_img = img
     if os.path.exists(result_json_path):
+        with open(result_json_path, "r", encoding='utf-8') as file:
             result_json = json.load(file)
     else:
         result_json = {"error": "No detection results found"}
+    os.remove(img_path)
     return result_img, result_json
 with gr.Blocks() as app:
+    gr.Markdown("# Surya OCR and Text Line Detection via CLI")
     with gr.Tab("OCR"):
         with gr.Column():
+            ocr_input_image = gr.Image(label="Input Image for OCR", type="pil")
+            ocr_language_selector = gr.Dropdown(
+                label="Select Language for OCR",
+                choices=[
+                    "Afrikaans",
+                    "Amharic",
+                    "Arabic",
+                    "Assamese",
+                    "Azerbaijani",
+                    "Belarusian",
+                    "Bulgarian",
+                    "Bengali",
+                    "Breton",
+                    "Bosnian",
+                    "Catalan",
+                    "Czech",
+                    "Welsh",
+                    "Danish",
+                    "German",
+                    "Greek",
+                    "English",
+                    "Esperanto",
+                    "Spanish",
+                    "Estonian",
+                    "Basque",
+                    "Persian",
+                    "Finnish",
+                    "French",
+                    "Western Frisian",
+                    "Irish",
+                    "Scottish Gaelic",
+                    "Galician",
+                    "Gujarati",
+                    "Hausa",
+                    "Hebrew",
+                    "Hindi",
+                    "Croatian",
+                    "Hungarian",
+                    "Armenian",
+                    "Indonesian",
+                    "Icelandic",
+                    "Italian",
+                    "Japanese",
+                    "Javanese",
+                    "Georgian",
+                    "Kazakh",
+                    "Khmer",
+                    "Kannada",
+                    "Korean",
+                    "Kurdish",
+                    "Kyrgyz",
+                    "Latin",
+                    "Lao",
+                    "Lithuanian",
+                    "Latvian",
+                    "Malagasy",
+                    "Macedonian",
+                    "Malayalam",
+                    "Mongolian",
+                    "Marathi",
+                    "Malay",
+                    "Burmese",
+                    "Nepali",
+                    "Dutch",
+                    "Norwegian",
+                    "Oromo",
+                    "Oriya",
+                    "Punjabi",
+                    "Polish",
+                    "Pashto",
+                    "Portuguese",
+                    "Romanian",
+                    "Russian",
+                    "Sanskrit",
+                    "Sindhi",
+                    "Sinhala",
+                    "Slovak",
+                    "Slovenian",
+                    "Somali",
+                    "Albanian",
+                    "Serbian",
+                    "Sundanese",
+                    "Swedish",
+                    "Swahili",
+                    "Tamil",
+                    "Telugu",
+                    "Thai",
+                    "Tagalog",
+                    "Turkish",
+                    "Uyghur",
+                    "Ukrainian",
+                    "Urdu",
+                    "Uzbek",
+                    "Vietnamese",
+                    "Xhosa",
+                    "Yiddish",
+                    "Chinese"
+                ],
+                value="English"
+            )
+            ocr_run_button = gr.Button("Run OCR")
         with gr.Column():
+            ocr_output_image = gr.Image(label="OCR Output Image", type="pil", interactive=False)
+            ocr_text_output = gr.TextArea(label="Recognized Text")
+        ocr_run_button.click(
+            fn=ocr_function_cli, inputs=[ocr_input_image, ocr_language_selector], outputs=[ocr_output_image, ocr_text_output]
+        )
+    with gr.Tab("Text Line Detection"):
         with gr.Column():
+            detection_input_image = gr.Image(label="Input Image for Detection", type="pil")
+            detection_run_button = gr.Button("Run Text Line Detection")
         with gr.Column():
+            detection_output_image = gr.Image(label="Detection Output Image", type="pil", interactive=False)
+            detection_json_output = gr.JSON(label="Detection JSON Output")
+        detection_run_button.click(
+            fn=text_line_detection_function_cli, inputs=detection_input_image, outputs=[detection_output_image, detection_json_output]
+        )
 if __name__ == "__main__":
+    app.launch()