Spaces:

GABRIELSZK
/

EXAMES

Sleeping

App Files Files Community

GABRIELSZK commited on Apr 25

Commit

a0cac80

verified ·

1 Parent(s): 8e5d51b

Upload 3 files

Browse files

Files changed (1) hide show

app.py +104 -1

app.py CHANGED Viewed

	@@ -1 +1,104 @@
1	- ~~<CÓDIGO FINAL COM OCR INSERIDO AQUI (reduzido para visualização)>~~

+import fitz
+import re
+import gradio as gr
+import pandas as pd
+import tempfile
+import pytesseract
+from PIL import Image
+import io
+faixas = {
+    "HB": (12, 17), "HT": (36, 50), "GLI": (70, 99), "UREIA": (10, 50),
+    "CR": (0.6, 1.3), "K+": (3.5, 5.5), "NA+": (135, 145), "TGO": (0, 40),
+    "TGP": (0, 40), "ALB": (3.5, 5.0), "INR": (0.8, 1.2), "TAP": (10, 14),
+    "TTP": (25, 35), "LAC": (0.5, 2.2), "PLT": (150000, 450000),
+    "LEUCO": (4000, 11000)
+}
+def classificar(nome, valor):
+    try:
+        val = float(valor.replace("K", "000").replace(">", "").replace("<", "").strip())
+        if nome in faixas:
+            min_v, max_v = faixas[nome]
+            if val < min_v:
+                return f"{valor} ↓"
+            elif val > max_v:
+                return f"{valor} ↑"
+        return valor
+    except:
+        return valor
+def extrair_texto_pdf(pdf_file):
+    texto_fitz = ""
+    imagens_ocr = []
+    with fitz.open(pdf_file.name) as doc:
+        for page in doc:
+            texto_fitz += page.get_text()
+            pix = page.get_pixmap(dpi=300)
+            img_data = pix.tobytes("png")
+            img = Image.open(io.BytesIO(img_data))
+            imagens_ocr.append(img)
+    texto_fitz = texto_fitz.replace('\n', ' ').replace('\r', ' ')
+    texto_ocr = " ".join([pytesseract.image_to_string(im) for im in imagens_ocr])
+    texto_ocr = texto_ocr.replace('\n', ' ').replace('\r', ' ')
+    return texto_fitz, texto_ocr
+def buscar_exame(texto_primario, texto_ocr, padrao):
+    matches = re.findall(padrao, texto_primario, re.IGNORECASE)
+    for val in matches:
+        val = val.strip().replace(",", ".")
+        if len(val.replace(".", "").replace(">", "").replace("<", "")) <= 5:
+            return val
+    matches_ocr = re.findall(padrao, texto_ocr, re.IGNORECASE)
+    for val in matches_ocr:
+        val = val.strip().replace(",", ".")
+        if len(val.replace(".", "").replace(">", "").replace("<", "")) <= 5:
+            return val
+    return None
+def extrair_exames_formatado(pdf_file):
+    if pdf_file is None:
+        return "Nenhum arquivo enviado.", None
+    texto_fitz, texto_ocr = extrair_texto_pdf(pdf_file)
+    campos = {
+        "HB": r"hemoglobina[^:\d]{0,10}[:=]?\s*([\d.,]+)",
+        "HT": r"hemat[óo]crito[^:\d]{0,10}[:=]?\s*([\d.,]+)",
+        "PLT": r"plaquetas[^:\d]{0,10}[:=]?\s*([\d.,]+)",
+        "INR": r"INR[^:\d]{0,10}[:=]?\s*([\d.,]+)",
+        "TROPO": r"troponina.*?[:=]?\s*([<>]?\s*[\d.,]+)"
+        # Pode adicionar outros campos aqui
+    }
+    resultados = []
+    for rotulo, padrao in campos.items():
+        val = buscar_exame(texto_fitz, texto_ocr, padrao)
+        if val:
+            resultados.append((rotulo, classificar(rotulo, val)))
+        else:
+            resultados.append((rotulo, "—"))
+    df = pd.DataFrame(resultados, columns=["Exame", "Valor"])
+    temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
+    df.to_csv(temp_file.name, index=False)
+    texto_final = "\n".join(f"{e}: {v}" for e, v in resultados)
+    return texto_final, temp_file.name
+with gr.Blocks() as demo:
+    gr.Markdown("## 🧪 Extrator de Exames com OCR")
+    pdf_file = gr.File(label="📄 PDF de exames", file_types=[".pdf"])
+    extract_button = gr.Button("🔍 Extrair Exames")
+    output_text = gr.Textbox(label="📋 Resultado Classificado", lines=25)
+    download_button = gr.File(label="📥 Baixar CSV")
+    extract_button.click(fn=extrair_exames_formatado, inputs=pdf_file, outputs=[output_text, download_button])
+demo.launch()