Spaces:

GABRIELSZK
/

EXAMES

Sleeping

App Files Files Community

GABRIELSZK commited on Apr 25

Commit

8f07449

verified ·

1 Parent(s): 3a8a27e

Update app.py

Browse files

Files changed (1) hide show

app.py +9 -6

app.py CHANGED Viewed

@@ -16,6 +16,9 @@ faixas = {
 }
 def classificar(nome, valor):
     try:
         raw = valor.replace("K", "").replace(">", "").replace("<", "").strip()
         val = float(raw)
@@ -36,7 +39,7 @@ def melhorar_imagem(img):
     img = img.filter(ImageFilter.SHARPEN)
     return img
-# Extrai texto nativo + OCR
 def extrair_texto_pdf(pdf_file):
     texto_fitz = []
     ocr_imgs = []
@@ -53,7 +56,7 @@ def extrair_texto_pdf(pdf_file):
     texto_ocr = re.sub(r'\s+', ' ', texto_ocr)
     return texto_fitz, texto_ocr
-# Padrões regex corrigidos para LINF e INR
 exames = {
     "LEUCO":   r"leuc[óo]citos.*?([\d.,]+)\s?(?:10\^3)?/u?l",
     "B":       r"bas[óo]filos.*?([\d.,]+)\s?%",
@@ -94,7 +97,7 @@ exames = {
     "TTP":     r"ttpa.*?([\d.,]+)\s?seg",
     "LAC":     r"lactato.*?([\d.,]+)\s?mmol/l",
     "CKMB":    r"ck[- ]?mb.*?([\d.,]+)\s?u/l",
-    "CPK":    r"cpk.*?\bresultado\b\s*([\d.,]+)",,
     "TROPO":   r"troponina.*?([<>]?[\d.,]+)\s?ng/ml"
 }
@@ -108,7 +111,7 @@ ordem = [
     "TGO","TGP","TAP","INR","TTP","LAC","CKMB","CPK","TROPO"
 ]
-# Extrai e formata
 def extrair_exames_formatado(pdf_file):
     if not pdf_file:
         return "Nenhum arquivo enviado.", None
@@ -132,7 +135,7 @@ def extrair_exames_formatado(pdf_file):
 # Interface Gradio
 with gr.Blocks() as demo:
-    gr.Markdown("## 🧪 Extrator Avançado com OCR - Regex Final")
     pdf_file = gr.File(label="📄 PDF de exames", file_types=[".pdf"])
     btn = gr.Button("🔍 Extrair Exames")
     out_txt = gr.Textbox(label="📋 Exames Classificados", lines=10)
@@ -140,4 +143,4 @@ with gr.Blocks() as demo:
     btn.click(extrair_exames_formatado, inputs=pdf_file, outputs=[out_txt, dl])
 if __name__ == '__main__':
-    demo.launch()

 }
 def classificar(nome, valor):
+    """
+    Recebe o nome do exame e valor em string; retorna valor com ↓ ou ↑ se fora da faixa.
+    """
     try:
         raw = valor.replace("K", "").replace(">", "").replace("<", "").strip()
         val = float(raw)
     img = img.filter(ImageFilter.SHARPEN)
     return img
+# Extrai texto nativo + OCR das páginas do PDF
 def extrair_texto_pdf(pdf_file):
     texto_fitz = []
     ocr_imgs = []
     texto_ocr = re.sub(r'\s+', ' ', texto_ocr)
     return texto_fitz, texto_ocr
+# Padrões regex para extração de cada exame
 exames = {
     "LEUCO":   r"leuc[óo]citos.*?([\d.,]+)\s?(?:10\^3)?/u?l",
     "B":       r"bas[óo]filos.*?([\d.,]+)\s?%",
     "TTP":     r"ttpa.*?([\d.,]+)\s?seg",
     "LAC":     r"lactato.*?([\d.,]+)\s?mmol/l",
     "CKMB":    r"ck[- ]?mb.*?([\d.,]+)\s?u/l",
+    "CPK":     r"cpk.*?\bresultado\b\s*([\d.,]+)",
     "TROPO":   r"troponina.*?([<>]?[\d.,]+)\s?ng/ml"
 }
     "TGO","TGP","TAP","INR","TTP","LAC","CKMB","CPK","TROPO"
 ]
+# Função principal de extração e formatação
 def extrair_exames_formatado(pdf_file):
     if not pdf_file:
         return "Nenhum arquivo enviado.", None
 # Interface Gradio
 with gr.Blocks() as demo:
+    gr.Markdown("## 🧪 Extrator Avançado com OCR - Versão Corrigida")
     pdf_file = gr.File(label="📄 PDF de exames", file_types=[".pdf"])
     btn = gr.Button("🔍 Extrair Exames")
     out_txt = gr.Textbox(label="📋 Exames Classificados", lines=10)
     btn.click(extrair_exames_formatado, inputs=pdf_file, outputs=[out_txt, dl])
 if __name__ == '__main__':
+    demo.launch()