Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -6,12 +6,12 @@ def extrair_exames_formatado(pdf_file):
|
|
6 |
if pdf_file is None:
|
7 |
return "Nenhum arquivo enviado."
|
8 |
|
9 |
-
doc = fitz.open(pdf_file.name)
|
10 |
texto = ""
|
11 |
-
|
12 |
-
|
|
|
13 |
|
14 |
-
def
|
15 |
match = re.search(padrao, texto, re.IGNORECASE)
|
16 |
if match:
|
17 |
return match.group(1).replace(",", ".").strip()
|
@@ -20,90 +20,89 @@ def extrair_exames_formatado(pdf_file):
|
|
20 |
def k_format(v):
|
21 |
try:
|
22 |
n = float(v.replace(".", "").replace(",", "."))
|
23 |
-
if n >= 1000
|
24 |
-
return f"{round(n / 1000, 1)}K"
|
25 |
-
return str(n)
|
26 |
except:
|
27 |
return v
|
28 |
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
if eas_leuc and eas_hem and eas_bact:
|
34 |
-
eas_final = f"EAS: {eas_leuc.group(1)} LEUCÓCITOS + {eas_hem.group(1)} HEMÁCIAS + BACTÉRIAS {eas_bact.group(1).upper()} //"
|
35 |
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
("CAI", r"ioniz[áa]vel[\s\n]+([\d,\.]+)"),
|
44 |
-
("CL-", r"cl[óo]ro[\s\n]+([\d,\.]+)"),
|
45 |
-
("CR", r"creatinina[\s\n]+([\d,\.]+)"),
|
46 |
-
("FAL", r"fosfatase alcalina[\s\n]+([\d,\.]+)"),
|
47 |
-
("P", r"f[óo]sforo[\s\n]+([\d,\.]+)"),
|
48 |
-
("GGT", r"gama.?gt[\s\n]+([\d,\.]+)"),
|
49 |
-
("GLI", r"glicose[\s\n]+([\d,\.]+)"),
|
50 |
-
("HB", r"hemoglobina[\s\n]+([\d,\.]+)"),
|
51 |
-
("HT", r"hemat[óo]crito[\s\n]+([\d,\.]+)"),
|
52 |
-
("LEUCO", r"leuc[óo]citos[\s\n]+([\d,\.]+)"),
|
53 |
-
("B", r"bastonetes[\s\n]+([\d,\.]+)"),
|
54 |
-
("SS", r"segmentados[\s\n]+([\d,\.]+)"),
|
55 |
-
("PLT", r"plaquetas[\s\n]+([\d,\.]+)"),
|
56 |
-
("LIP", r"lipase[\s\n]+([\d,\.]+)"),
|
57 |
-
("MG++", r"magn[ée]sio[\s\n]+([\d,\.]+)"),
|
58 |
-
("PCR", r"PCR[\s\n]+([\d,\.]+)"),
|
59 |
-
("K+", r"pot[áa]ssio[\s\n]+([\d,\.]+)"),
|
60 |
-
("PTN", r"prote[ií]na[s]? totais?[\s\n]+([\d,\.]+)"),
|
61 |
-
("ALB", r"albumina[\s\n]+([\d,\.]+)"),
|
62 |
-
("NA+", r"s[óo]dio[\s\n]+([\d,\.]+)"),
|
63 |
-
("TGO", r"TGO[\s\n]+([\d,\.]+)"),
|
64 |
-
("TGP", r"TGP[\s\n]+([\d,\.]+)"),
|
65 |
-
("TAP", r"TAP.*?([\d,\.]+)"),
|
66 |
-
("INR", r"INR.*?([\d,\.]+)"),
|
67 |
-
("TTP", r"TTP.*?([\d,\.]+)"),
|
68 |
-
("RELAÇÃO", r"relaç[aã]o.*?([\d,\.]+)"),
|
69 |
-
("UR", r"ureia[\s\n]+([\d,\.]+)"),
|
70 |
-
("CPK", r"CPK.*?([\d,\.]+)"),
|
71 |
-
("CKMB", r"CKMB.*?([\d,\.]+)"),
|
72 |
-
("TROPO", r"tropo.*?([<>=]?[\d,\.]+)")
|
73 |
-
]
|
74 |
|
75 |
-
|
76 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
|
83 |
-
|
84 |
-
l = k_format(temp_dict["LEUCO"])
|
85 |
-
b = f"{temp_dict['B']}% B" if "B" in temp_dict else ""
|
86 |
-
ss = f"{temp_dict['SS']}% SS" if "SS" in temp_dict else ""
|
87 |
-
combo = f"LEUCO {l}" + (f" + {b}" if b else "") + (f" + {ss}" if ss else "")
|
88 |
-
output.append(combo)
|
89 |
-
temp_dict.pop("LEUCO")
|
90 |
-
temp_dict.pop("B", None)
|
91 |
-
temp_dict.pop("SS", None)
|
92 |
-
|
93 |
-
ordem = [k for k, _ in campos if k not in ["LEUCO", "B", "SS"]]
|
94 |
-
for campo in ordem:
|
95 |
-
if campo in temp_dict:
|
96 |
-
output.append(f"{campo} {temp_dict[campo]}")
|
97 |
-
|
98 |
-
return " / ".join(output) if output else "Nenhum dado encontrado."
|
99 |
|
|
|
100 |
with gr.Blocks() as demo:
|
101 |
-
gr.Markdown("## Extrator de Exames - PDF Clínico Compactado")
|
102 |
with gr.Row():
|
103 |
-
pdf = gr.File(label="PDF de exames", file_types=[".pdf"])
|
104 |
-
btn = gr.Button("Extrair")
|
105 |
-
|
106 |
-
resultado = gr.Textbox(label="Exames extraídos", lines=4)
|
107 |
btn.click(fn=extrair_exames_formatado, inputs=pdf, outputs=resultado)
|
108 |
|
109 |
demo.launch()
|
|
|
6 |
if pdf_file is None:
|
7 |
return "Nenhum arquivo enviado."
|
8 |
|
|
|
9 |
texto = ""
|
10 |
+
with fitz.open(pdf_file.name) as doc:
|
11 |
+
for page in doc:
|
12 |
+
texto += page.get_text()
|
13 |
|
14 |
+
def buscar(padrao):
|
15 |
match = re.search(padrao, texto, re.IGNORECASE)
|
16 |
if match:
|
17 |
return match.group(1).replace(",", ".").strip()
|
|
|
20 |
def k_format(v):
|
21 |
try:
|
22 |
n = float(v.replace(".", "").replace(",", "."))
|
23 |
+
return f"{round(n / 1000, 1)}K" if n >= 1000 else str(n)
|
|
|
|
|
24 |
except:
|
25 |
return v
|
26 |
|
27 |
+
# Coleta combinada de Leucócitos, Bastonetes e Segmentados
|
28 |
+
leuco = buscar(r"leuc[óo]citos\s+([\d.,]+)")
|
29 |
+
bastonetes = buscar(r"bastonetes\s+(\d+)\s*%")
|
30 |
+
segmentados = buscar(r"segmentados\s+(\d+)\s*%")
|
|
|
|
|
31 |
|
32 |
+
leuco_str = ""
|
33 |
+
if leuco:
|
34 |
+
leuco_str = f"LEUCO {k_format(leuco)}"
|
35 |
+
if bastonetes:
|
36 |
+
leuco_str += f" + {bastonetes}% B"
|
37 |
+
if segmentados:
|
38 |
+
leuco_str += f" + {segmentados}% SS"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
|
40 |
+
exames = {
|
41 |
+
"🟠 Renal / Eletrólitos": {
|
42 |
+
"UREIA": buscar(r"ureia\s+([\d.,]+)"),
|
43 |
+
"CR": buscar(r"creatinina\s+([\d.,]+)"),
|
44 |
+
"K+": buscar(r"pot[áa]ssio\s+([\d.,]+)"),
|
45 |
+
"NA+": buscar(r"s[óo]dio\s+([\d.,]+)"),
|
46 |
+
"CL-": buscar(r"cl[óo]ro\s+([\d.,]+)"),
|
47 |
+
"CAI": buscar(r"ioniz[áa]vel\s+([\d.,]+)"),
|
48 |
+
"CA TOTAL": buscar(r"c[áa]lcio total\s+([\d.,]+)"),
|
49 |
+
"MG++": buscar(r"magn[ée]sio\s+([\d.,]+)"),
|
50 |
+
"FÓS": buscar(r"f[óo]sforo\s+([\d.,]+)"),
|
51 |
+
},
|
52 |
+
"🟡 Hepático": {
|
53 |
+
"BT": buscar(r"bilirrubina total\s+([\d.,]+)"),
|
54 |
+
"BD": buscar(r"bilirrubina direta\s+([\d.,]+)"),
|
55 |
+
"BI": buscar(r"bilirrubina indireta\s+([\d.,]+)"),
|
56 |
+
"TGO": buscar(r"TGO\s+([\d.,]+)"),
|
57 |
+
"TGP": buscar(r"TGP\s+([\d.,]+)"),
|
58 |
+
"GGT": buscar(r"gama.?gt\s+([\d.,]+)"),
|
59 |
+
"FAL": buscar(r"fosfatase alcalina\s+([\d.,]+)"),
|
60 |
+
"ALB": buscar(r"albumina\s+([\d.,]+)"),
|
61 |
+
"PTN TOTAL": buscar(r"prote[ií]nas totais\s+([\d.,]+)"),
|
62 |
+
"GLOB": buscar(r"globulina\s+([\d.,]+)"),
|
63 |
+
"RELAÇÃO": buscar(r"relaç[aã]o A/G\s+([\d.,]+)")
|
64 |
+
},
|
65 |
+
"🔴 Hematológico": {
|
66 |
+
"HB": buscar(r"hemoglobina\s+([\d.,]+)"),
|
67 |
+
"HT": buscar(r"hemat[óo]crito\s+([\d.,]+)"),
|
68 |
+
"PLT": buscar(r"plaquetas\s+([\d.,]+)")
|
69 |
+
},
|
70 |
+
"🔵 Coagulação": {
|
71 |
+
"TAP": buscar(r"TEMPO\s+14,4|TAP.*?([\d.,]+)"),
|
72 |
+
"INR": buscar(r"INR\s*[:\-]?\s*([\d.,]+)"),
|
73 |
+
"TTP": buscar(r"TTPA.*?tempo\s+([\d.,]+)"),
|
74 |
+
"RELAÇÃO": buscar(r"relaç[aã]o.*?([\d.,]+)")
|
75 |
+
},
|
76 |
+
"🟢 Metabólico": {
|
77 |
+
"GLI": buscar(r"glicose\s+([\d.,]+)"),
|
78 |
+
"LIP": buscar(r"lipase\s+([\d.,]+)"),
|
79 |
+
"AMIL": buscar(r"amilase\s+([\d.,]+)"),
|
80 |
+
"AC UR": buscar(r"[áa]cido[ \n]+[úu]rico\s+([\d.,]+)"),
|
81 |
+
"LAC": buscar(r"lactato.*?([\d.,]+)"),
|
82 |
+
"PCR": buscar(r"PCR.*?([\d.,]+)")
|
83 |
+
}
|
84 |
+
}
|
85 |
|
86 |
+
output = []
|
87 |
+
if leuco_str:
|
88 |
+
output.append(leuco_str)
|
89 |
+
for sistema, dados in exames.items():
|
90 |
+
linha = []
|
91 |
+
for k, v in dados.items():
|
92 |
+
if v:
|
93 |
+
linha.append(f"{k}: {v}")
|
94 |
+
if linha:
|
95 |
+
output.append(f"{sistema} → " + " / ".join(linha))
|
96 |
|
97 |
+
return "\n".join(output) if output else "Nenhum dado encontrado."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
98 |
|
99 |
+
# Gradio Interface
|
100 |
with gr.Blocks() as demo:
|
101 |
+
gr.Markdown("## 🧪 Extrator de Exames - PDF Clínico Compactado")
|
102 |
with gr.Row():
|
103 |
+
pdf = gr.File(label="📄 PDF de exames", file_types=[".pdf"])
|
104 |
+
btn = gr.Button("🔍 Extrair Exames")
|
105 |
+
resultado = gr.Textbox(label="📋 Resultados Extraídos", lines=20)
|
|
|
106 |
btn.click(fn=extrair_exames_formatado, inputs=pdf, outputs=resultado)
|
107 |
|
108 |
demo.launch()
|