Spaces:

GABRIELSZK
/

EXAMES

Sleeping

App Files Files Community

GABRIELSZK commited on Apr 25

Commit

28c22fc

verified ·

1 Parent(s): 9de950a

Upload 3 files

Browse files

Files changed (3) hide show

README.md +26 -0
app.py +124 -0
requirements.txt +5 -0

README.md ADDED Viewed

	@@ -0,0 +1,26 @@

+---
+title: Extrator de Exames PDF
+emoji: 🧪
+colorFrom: blue
+colorTo: indigo
+sdk: gradio
+sdk_version: "5.26.0"
+app_file: app.py
+pinned: false
+---
+# 🧪 Extrator de Exames Laboratoriais em PDF
+Este Space realiza a extração de exames laboratoriais de PDFs clínicos.
+Funciona mesmo com PDFs escaneados usando fallback OCR via Tesseract.
+### Funcionalidades
+- ✅ Extração de texto com PyMuPDF (`fitz`)
+- 🧠 Fallback OCR com `pytesseract`
+- 📊 Classificação automática de valores alterados (`↑` / `↓`)
+- 📥 Exportação em CSV
+---
+Desenvolvido para uso clínico rápido e confiável em ambientes hospitalares.
+Hospedado com ❤️ via Hugging Face Spaces.

app.py ADDED Viewed

	@@ -0,0 +1,124 @@

+import fitz
+import re
+import gradio as gr
+import pandas as pd
+import tempfile
+import pytesseract
+from PIL import Image, ImageEnhance, ImageFilter
+import io
+def classificar(nome, valor):
+    faixas = {
+        "HB": (12, 17), "HT": (36, 50), "GLI": (70, 99), "UREIA": (10, 50),
+        "CR": (0.6, 1.3), "K+": (3.5, 5.5), "NA+": (135, 145), "TGO": (0, 40),
+        "TGP": (0, 40), "ALB": (3.5, 5.0), "INR": (0.8, 1.2), "TAP": (10, 14),
+        "TTP": (25, 35), "LAC": (0.5, 2.2), "PLT": (150, 450), "LEUCO": (4, 11),
+        "CKMB": (0, 24), "CPK": (0, 190), "TROPO": (0, 0.04), "AMIL": (28, 100),
+        "LIP": (0, 60), "PCR": (0, 1), "ÁC UR": (3.5, 7.2), "FAL": (44, 147),
+        "GGT": (0, 38), "FÓS": (2.5, 4.5), "MG++": (1.6, 2.6), "CA TOTAL": (8.6, 10.2),
+        "CAI": (1.1, 1.35), "BT": (0.2, 1.2), "BD": (0, 0.4), "BI": (0.1, 0.8), "CL-": (96, 106)
+    }
+    try:
+        val = float(valor.replace("K", "").replace(">", "").replace("<", "").strip())
+        if nome in faixas:
+            min_v, max_v = faixas[nome]
+            if val < min_v:
+                return f"{valor} ↓"
+            elif val > max_v:
+                return f"{valor} ↑"
+        return valor
+    except:
+        return valor
+def melhorar_imagem(img):
+    img = img.convert('L')
+    img = ImageEnhance.Contrast(img).enhance(2)
+    img = img.filter(ImageFilter.SHARPEN)
+    return img
+def limpar_texto(texto):
+    texto = re.sub(r'\b([A-Z])\s+([A-Z])\b', r'\1\2', texto)  # Junta siglas com espaços (ex: "C P K" → "CPK")
+    return re.sub(r'\s+', ' ', texto)
+def extrair_texto_pdf(pdf_file):
+    texto_fitz = ""
+    imagens_ocr = []
+    with fitz.open(pdf_file.name) as doc:
+        for page in doc:
+            texto_fitz += page.get_text()
+            pix = page.get_pixmap(dpi=400)
+            img = Image.open(io.BytesIO(pix.tobytes("png")))
+            imagens_ocr.append(melhorar_imagem(img))
+    texto_fitz = limpar_texto(texto_fitz)
+    texto_ocr = limpar_texto(" ".join(pytesseract.image_to_string(im) for im in imagens_ocr))
+    return texto_fitz, texto_ocr
+def buscar_exame(textos, padrao):
+    for texto in textos:
+        match = re.search(padrao, texto, re.IGNORECASE)
+        if match:
+            return match.group(1).replace(",", ".").strip()
+    return None
+def extrair_exames_formatado(pdf_file):
+    if not pdf_file:
+        return "Nenhum arquivo enviado.", None
+    texto_fitz, texto_ocr = extrair_texto_pdf(pdf_file)
+    textos = [texto_fitz, texto_ocr]  # Sempre considerar ambos
+    exames = {
+        "AMIL": r"amilase[^\d]{0,10}([\d.,]+)",
+        "ÁC UR": r"ácido[\s]?úrico[^\d]{0,10}([\d.,]+)",
+        "BT": r"bilirrubina total|bt[^\d]{0,10}([\d.,]+)",
+        "BD": r"bilirrubina direta|bd[^\d]{0,10}([\d.,]+)",
+        "BI": r"bilirrubina indireta|bi[^\d]{0,10}([\d.,]+)",
+        "CAI": r"cálcio ionizável|cai[^\d]{0,10}([\d.,]+)",
+        "CA TOTAL": r"cálcio total[^\d]{0,10}([\d.,]+)",
+        "CL-": r"cloro[^\d]{0,10}([\d.,]+)",
+        "CR": r"creatinina[^\d]{0,10}([\d.,]+)",
+        "FAL": r"fosfatase alcalina|fal[^\d]{0,10}([\d.,]+)",
+        "FÓS": r"f[óo]sforo[^\d]{0,10}([\d.,]+)",
+        "GGT": r"gama.*?gt|ggt[^\d]{0,10}([\d.,]+)",
+        "GLI": r"glicose[^\d]{0,10}([\d.,]+)",
+        "LIP": r"lipase[^\d]{0,10}([\d.,]+)",
+        "MG++": r"magn[ée]sio[^\d]{0,10}([\d.,]+)",
+        "PCR": r"pcr[^\d]{0,10}([\d.,]+)",
+        "K+": r"pot[áa]ssio[^\d]{0,10}([\d.,]+)",
+        "PTN": r"proteínas totais[^\d]{0,10}([\d.,]+)",
+        "ALB": r"albumina[^\d]{0,10}([\d.,]+)",
+        "GLOB": r"globulina[^\d]{0,10}([\d.,]+)",
+        "RELAÇÃO": r"relação.*?a/g[^\d]{0,10}([\d.,]+)",
+        "NA+": r"s[óo]dio[^\d]{0,10}([\d.,]+)",
+        "TGO": r"tgo[^\d]{0,10}([\d.,]+)",
+        "TGP": r"tgp[^\d]{0,10}([\d.,]+)",
+        "TAP": r"tap[^\d]{0,10}([\d.,]+)",
+        "INR": r"inr[^\d]{0,10}([\d.,]+)",
+        "TTP": r"ttpa[^\d]{0,10}([\d.,]+)",
+        "UREIA": r"ureia[^\d]{0,10}([\d.,]+)",
+        "LAC": r"lactato[^\d]{0,10}([\d.,]+)",
+        "LEUCO": r"leuc[óo]citos[^\d]{0,10}([\d.,]+)",
+        "HB": r"hemoglobina[^\d]{0,10}([\d.,]+)",
+        "HT": r"hemat[óo]crito[^\d]{0,10}([\d.,]+)",
+        "PLT": r"plaquetas[^\d]{0,10}([\d.,]+)",
+        "CPK": r"(?:cpk|creatinofosfoquinase)[^\d]{0,10}([\d.,]+)",
+        "CKMB": r"(?:ck[- ]?mb|ckmb massa)[^\d]{0,10}([\d.,]+)",
+        "TROPO": r"(?:troponina)[^\d]{0,10}([<>]?[\d.,]+)"
+    }
+    resultados = [(exame, classificar(exame, buscar_exame(textos, padrao) or "—")) for exame, padrao in exames.items()]
+    df = pd.DataFrame(resultados, columns=["Exame", "Valor"])
+    temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
+    df.to_csv(temp_file.name, index=False)
+    texto_final = "\n".join(f"{e}: {v}" for e, v in resultados)
+    return texto_final, temp_file.name
+with gr.Blocks() as demo:
+    pdf_file = gr.File(label="📄 PDF Exames")
+    extract_button = gr.Button("🔍 Extrair")
+    output_text = gr.Textbox(label="📋 Resultados", lines=25)
+    download_button = gr.File(label="📥 CSV")
+    extract_button.click(extrair_exames_formatado, pdf_file, [output_text, download_button])
+demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+gradio>=5.26.0
+pymupdf
+pytesseract
+pillow
+pandas