Spaces:
Sleeping
Sleeping
Upload 3 files
Browse files- README.md +20 -0
- app.py +109 -0
- requirements.txt +2 -0
README.md
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: Extrator de Exames Laboratoriais
|
3 |
+
emoji: 🧪
|
4 |
+
colorFrom: blue
|
5 |
+
colorTo: green
|
6 |
+
sdk: gradio
|
7 |
+
sdk_version: "4.21.0"
|
8 |
+
app_file: app.py
|
9 |
+
pinned: false
|
10 |
+
---
|
11 |
+
|
12 |
+
# Extrator de Exames Laboratoriais (PDF → Texto Compacto)
|
13 |
+
|
14 |
+
Este app Gradio permite carregar um PDF com resultados laboratoriais e extrai automaticamente os dados mais relevantes (HB, HT, Leuco, PCR, CKMB, etc.) no formato compacto.
|
15 |
+
|
16 |
+
**Exemplo de saída:**
|
17 |
+
|
18 |
+
`HB: 14.5 / HT: 41.5 / LEUCO: 9.1K + 1% B + 66% SS / PLT: 215K / ...`
|
19 |
+
|
20 |
+
A ferramenta é tolerante a diferentes formatações, títulos e layouts de PDFs clínicos.
|
app.py
ADDED
@@ -0,0 +1,109 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import fitz
|
2 |
+
import re
|
3 |
+
import gradio as gr
|
4 |
+
|
5 |
+
def extrair_exames_formatado(pdf_file):
|
6 |
+
if pdf_file is None:
|
7 |
+
return "Nenhum arquivo enviado."
|
8 |
+
|
9 |
+
doc = fitz.open(pdf_file.name)
|
10 |
+
texto = ""
|
11 |
+
for page in doc:
|
12 |
+
texto += page.get_text()
|
13 |
+
|
14 |
+
def extrair_valor(padrao):
|
15 |
+
match = re.search(padrao, texto, re.IGNORECASE)
|
16 |
+
if match:
|
17 |
+
return match.group(1).replace(",", ".").strip()
|
18 |
+
return None
|
19 |
+
|
20 |
+
def k_format(v):
|
21 |
+
try:
|
22 |
+
n = float(v.replace(".", "").replace(",", "."))
|
23 |
+
if n >= 1000:
|
24 |
+
return f"{round(n / 1000, 1)}K"
|
25 |
+
return str(n)
|
26 |
+
except:
|
27 |
+
return v
|
28 |
+
|
29 |
+
eas_leuc = re.search(r"leucócitos[\s\n]*(\d+-\d+)", texto, re.IGNORECASE)
|
30 |
+
eas_hem = re.search(r"hem[áa]cias[\s\n]*(\d+-\d+)", texto, re.IGNORECASE)
|
31 |
+
eas_bact = re.search(r"bact[ée]rias[\s\n]*(\w+)", texto, re.IGNORECASE)
|
32 |
+
eas_final = ""
|
33 |
+
if eas_leuc and eas_hem and eas_bact:
|
34 |
+
eas_final = f"EAS: {eas_leuc.group(1)} LEUCÓCITOS + {eas_hem.group(1)} HEMÁCIAS + BACTÉRIAS {eas_bact.group(1).upper()} //"
|
35 |
+
|
36 |
+
campos = [
|
37 |
+
("LAC", r"lactato[\s\n]+([\d,\.]+)"),
|
38 |
+
("AMIL", r"amilase[\s\n]+([\d,\.]+)"),
|
39 |
+
("AC UR", r"[áa]cido[ \n]+[úu]rico[\s\n]+([\d,\.]+)"),
|
40 |
+
("BT", r"bilirrubina total[\s\n]+([\d,\.]+)"),
|
41 |
+
("BD", r"bilirrubina direta[\s\n]+([\d,\.]+)"),
|
42 |
+
("BI", r"bilirrubina indireta[\s\n]+([\d,\.]+)"),
|
43 |
+
("CAI", r"ioniz[áa]vel[\s\n]+([\d,\.]+)"),
|
44 |
+
("CL-", r"cl[óo]ro[\s\n]+([\d,\.]+)"),
|
45 |
+
("CR", r"creatinina[\s\n]+([\d,\.]+)"),
|
46 |
+
("FAL", r"fosfatase alcalina[\s\n]+([\d,\.]+)"),
|
47 |
+
("P", r"f[óo]sforo[\s\n]+([\d,\.]+)"),
|
48 |
+
("GGT", r"gama.?gt[\s\n]+([\d,\.]+)"),
|
49 |
+
("GLI", r"glicose[\s\n]+([\d,\.]+)"),
|
50 |
+
("HB", r"hemoglobina[\s\n]+([\d,\.]+)"),
|
51 |
+
("HT", r"hemat[óo]crito[\s\n]+([\d,\.]+)"),
|
52 |
+
("LEUCO", r"leuc[óo]citos[\s\n]+([\d,\.]+)"),
|
53 |
+
("B", r"bastonetes[\s\n]+([\d,\.]+)"),
|
54 |
+
("SS", r"segmentados[\s\n]+([\d,\.]+)"),
|
55 |
+
("PLT", r"plaquetas[\s\n]+([\d,\.]+)"),
|
56 |
+
("LIP", r"lipase[\s\n]+([\d,\.]+)"),
|
57 |
+
("MG++", r"magn[ée]sio[\s\n]+([\d,\.]+)"),
|
58 |
+
("PCR", r"PCR[\s\n]+([\d,\.]+)"),
|
59 |
+
("K+", r"pot[áa]ssio[\s\n]+([\d,\.]+)"),
|
60 |
+
("PTN", r"prote[ií]na[s]? totais?[\s\n]+([\d,\.]+)"),
|
61 |
+
("ALB", r"albumina[\s\n]+([\d,\.]+)"),
|
62 |
+
("NA+", r"s[óo]dio[\s\n]+([\d,\.]+)"),
|
63 |
+
("TGO", r"TGO[\s\n]+([\d,\.]+)"),
|
64 |
+
("TGP", r"TGP[\s\n]+([\d,\.]+)"),
|
65 |
+
("TAP", r"TAP.*?([\d,\.]+)"),
|
66 |
+
("INR", r"INR.*?([\d,\.]+)"),
|
67 |
+
("TTP", r"TTP.*?([\d,\.]+)"),
|
68 |
+
("RELAÇÃO", r"relaç[aã]o.*?([\d,\.]+)"),
|
69 |
+
("UR", r"ureia[\s\n]+([\d,\.]+)"),
|
70 |
+
("CPK", r"CPK.*?([\d,\.]+)"),
|
71 |
+
("CKMB", r"CKMB.*?([\d,\.]+)"),
|
72 |
+
("TROPO", r"tropo.*?([<>=]?[\d,\.]+)")
|
73 |
+
]
|
74 |
+
|
75 |
+
output = [eas_final] if eas_final else []
|
76 |
+
temp_dict = {}
|
77 |
+
|
78 |
+
for rotulo, regex in campos:
|
79 |
+
val = extrair_valor(regex)
|
80 |
+
if val:
|
81 |
+
temp_dict[rotulo] = val
|
82 |
+
|
83 |
+
if "LEUCO" in temp_dict:
|
84 |
+
l = k_format(temp_dict["LEUCO"])
|
85 |
+
b = f"{temp_dict['B']}% B" if "B" in temp_dict else ""
|
86 |
+
ss = f"{temp_dict['SS']}% SS" if "SS" in temp_dict else ""
|
87 |
+
combo = f"LEUCO {l}" + (f" + {b}" if b else "") + (f" + {ss}" if ss else "")
|
88 |
+
output.append(combo)
|
89 |
+
temp_dict.pop("LEUCO")
|
90 |
+
temp_dict.pop("B", None)
|
91 |
+
temp_dict.pop("SS", None)
|
92 |
+
|
93 |
+
ordem = [k for k, _ in campos if k not in ["LEUCO", "B", "SS"]]
|
94 |
+
for campo in ordem:
|
95 |
+
if campo in temp_dict:
|
96 |
+
output.append(f"{campo} {temp_dict[campo]}")
|
97 |
+
|
98 |
+
return " / ".join(output) if output else "Nenhum dado encontrado."
|
99 |
+
|
100 |
+
with gr.Blocks() as demo:
|
101 |
+
gr.Markdown("## Extrator de Exames - PDF Clínico Compactado")
|
102 |
+
with gr.Row():
|
103 |
+
pdf = gr.File(label="PDF de exames", file_types=[".pdf"])
|
104 |
+
btn = gr.Button("Extrair")
|
105 |
+
|
106 |
+
resultado = gr.Textbox(label="Exames extraídos", lines=4)
|
107 |
+
btn.click(fn=extrair_exames_formatado, inputs=pdf, outputs=resultado)
|
108 |
+
|
109 |
+
demo.launch()
|
requirements.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
gradio>=4.44.1
|
2 |
+
PyMuPDF
|