GABRIELSZK commited on
Commit
3b20acd
·
verified ·
1 Parent(s): 2f8b152

Upload 3 files

Browse files
Files changed (3) hide show
  1. README.md +20 -0
  2. app.py +109 -0
  3. requirements.txt +2 -0
README.md ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Extrator de Exames Laboratoriais
3
+ emoji: 🧪
4
+ colorFrom: blue
5
+ colorTo: green
6
+ sdk: gradio
7
+ sdk_version: "4.21.0"
8
+ app_file: app.py
9
+ pinned: false
10
+ ---
11
+
12
+ # Extrator de Exames Laboratoriais (PDF → Texto Compacto)
13
+
14
+ Este app Gradio permite carregar um PDF com resultados laboratoriais e extrai automaticamente os dados mais relevantes (HB, HT, Leuco, PCR, CKMB, etc.) no formato compacto.
15
+
16
+ **Exemplo de saída:**
17
+
18
+ `HB: 14.5 / HT: 41.5 / LEUCO: 9.1K + 1% B + 66% SS / PLT: 215K / ...`
19
+
20
+ A ferramenta é tolerante a diferentes formatações, títulos e layouts de PDFs clínicos.
app.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import fitz
2
+ import re
3
+ import gradio as gr
4
+
5
+ def extrair_exames_formatado(pdf_file):
6
+ if pdf_file is None:
7
+ return "Nenhum arquivo enviado."
8
+
9
+ doc = fitz.open(pdf_file.name)
10
+ texto = ""
11
+ for page in doc:
12
+ texto += page.get_text()
13
+
14
+ def extrair_valor(padrao):
15
+ match = re.search(padrao, texto, re.IGNORECASE)
16
+ if match:
17
+ return match.group(1).replace(",", ".").strip()
18
+ return None
19
+
20
+ def k_format(v):
21
+ try:
22
+ n = float(v.replace(".", "").replace(",", "."))
23
+ if n >= 1000:
24
+ return f"{round(n / 1000, 1)}K"
25
+ return str(n)
26
+ except:
27
+ return v
28
+
29
+ eas_leuc = re.search(r"leucócitos[\s\n]*(\d+-\d+)", texto, re.IGNORECASE)
30
+ eas_hem = re.search(r"hem[áa]cias[\s\n]*(\d+-\d+)", texto, re.IGNORECASE)
31
+ eas_bact = re.search(r"bact[ée]rias[\s\n]*(\w+)", texto, re.IGNORECASE)
32
+ eas_final = ""
33
+ if eas_leuc and eas_hem and eas_bact:
34
+ eas_final = f"EAS: {eas_leuc.group(1)} LEUCÓCITOS + {eas_hem.group(1)} HEMÁCIAS + BACTÉRIAS {eas_bact.group(1).upper()} //"
35
+
36
+ campos = [
37
+ ("LAC", r"lactato[\s\n]+([\d,\.]+)"),
38
+ ("AMIL", r"amilase[\s\n]+([\d,\.]+)"),
39
+ ("AC UR", r"[áa]cido[ \n]+[úu]rico[\s\n]+([\d,\.]+)"),
40
+ ("BT", r"bilirrubina total[\s\n]+([\d,\.]+)"),
41
+ ("BD", r"bilirrubina direta[\s\n]+([\d,\.]+)"),
42
+ ("BI", r"bilirrubina indireta[\s\n]+([\d,\.]+)"),
43
+ ("CAI", r"ioniz[áa]vel[\s\n]+([\d,\.]+)"),
44
+ ("CL-", r"cl[óo]ro[\s\n]+([\d,\.]+)"),
45
+ ("CR", r"creatinina[\s\n]+([\d,\.]+)"),
46
+ ("FAL", r"fosfatase alcalina[\s\n]+([\d,\.]+)"),
47
+ ("P", r"f[óo]sforo[\s\n]+([\d,\.]+)"),
48
+ ("GGT", r"gama.?gt[\s\n]+([\d,\.]+)"),
49
+ ("GLI", r"glicose[\s\n]+([\d,\.]+)"),
50
+ ("HB", r"hemoglobina[\s\n]+([\d,\.]+)"),
51
+ ("HT", r"hemat[óo]crito[\s\n]+([\d,\.]+)"),
52
+ ("LEUCO", r"leuc[óo]citos[\s\n]+([\d,\.]+)"),
53
+ ("B", r"bastonetes[\s\n]+([\d,\.]+)"),
54
+ ("SS", r"segmentados[\s\n]+([\d,\.]+)"),
55
+ ("PLT", r"plaquetas[\s\n]+([\d,\.]+)"),
56
+ ("LIP", r"lipase[\s\n]+([\d,\.]+)"),
57
+ ("MG++", r"magn[ée]sio[\s\n]+([\d,\.]+)"),
58
+ ("PCR", r"PCR[\s\n]+([\d,\.]+)"),
59
+ ("K+", r"pot[áa]ssio[\s\n]+([\d,\.]+)"),
60
+ ("PTN", r"prote[ií]na[s]? totais?[\s\n]+([\d,\.]+)"),
61
+ ("ALB", r"albumina[\s\n]+([\d,\.]+)"),
62
+ ("NA+", r"s[óo]dio[\s\n]+([\d,\.]+)"),
63
+ ("TGO", r"TGO[\s\n]+([\d,\.]+)"),
64
+ ("TGP", r"TGP[\s\n]+([\d,\.]+)"),
65
+ ("TAP", r"TAP.*?([\d,\.]+)"),
66
+ ("INR", r"INR.*?([\d,\.]+)"),
67
+ ("TTP", r"TTP.*?([\d,\.]+)"),
68
+ ("RELAÇÃO", r"relaç[aã]o.*?([\d,\.]+)"),
69
+ ("UR", r"ureia[\s\n]+([\d,\.]+)"),
70
+ ("CPK", r"CPK.*?([\d,\.]+)"),
71
+ ("CKMB", r"CKMB.*?([\d,\.]+)"),
72
+ ("TROPO", r"tropo.*?([<>=]?[\d,\.]+)")
73
+ ]
74
+
75
+ output = [eas_final] if eas_final else []
76
+ temp_dict = {}
77
+
78
+ for rotulo, regex in campos:
79
+ val = extrair_valor(regex)
80
+ if val:
81
+ temp_dict[rotulo] = val
82
+
83
+ if "LEUCO" in temp_dict:
84
+ l = k_format(temp_dict["LEUCO"])
85
+ b = f"{temp_dict['B']}% B" if "B" in temp_dict else ""
86
+ ss = f"{temp_dict['SS']}% SS" if "SS" in temp_dict else ""
87
+ combo = f"LEUCO {l}" + (f" + {b}" if b else "") + (f" + {ss}" if ss else "")
88
+ output.append(combo)
89
+ temp_dict.pop("LEUCO")
90
+ temp_dict.pop("B", None)
91
+ temp_dict.pop("SS", None)
92
+
93
+ ordem = [k for k, _ in campos if k not in ["LEUCO", "B", "SS"]]
94
+ for campo in ordem:
95
+ if campo in temp_dict:
96
+ output.append(f"{campo} {temp_dict[campo]}")
97
+
98
+ return " / ".join(output) if output else "Nenhum dado encontrado."
99
+
100
+ with gr.Blocks() as demo:
101
+ gr.Markdown("## Extrator de Exames - PDF Clínico Compactado")
102
+ with gr.Row():
103
+ pdf = gr.File(label="PDF de exames", file_types=[".pdf"])
104
+ btn = gr.Button("Extrair")
105
+
106
+ resultado = gr.Textbox(label="Exames extraídos", lines=4)
107
+ btn.click(fn=extrair_exames_formatado, inputs=pdf, outputs=resultado)
108
+
109
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ gradio>=4.44.1
2
+ PyMuPDF