GABRIELSZK commited on
Commit
8866ba3
·
verified ·
1 Parent(s): 3b20acd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +77 -78
app.py CHANGED
@@ -6,12 +6,12 @@ def extrair_exames_formatado(pdf_file):
6
  if pdf_file is None:
7
  return "Nenhum arquivo enviado."
8
 
9
- doc = fitz.open(pdf_file.name)
10
  texto = ""
11
- for page in doc:
12
- texto += page.get_text()
 
13
 
14
- def extrair_valor(padrao):
15
  match = re.search(padrao, texto, re.IGNORECASE)
16
  if match:
17
  return match.group(1).replace(",", ".").strip()
@@ -20,90 +20,89 @@ def extrair_exames_formatado(pdf_file):
20
  def k_format(v):
21
  try:
22
  n = float(v.replace(".", "").replace(",", "."))
23
- if n >= 1000:
24
- return f"{round(n / 1000, 1)}K"
25
- return str(n)
26
  except:
27
  return v
28
 
29
- eas_leuc = re.search(r"leucócitos[\s\n]*(\d+-\d+)", texto, re.IGNORECASE)
30
- eas_hem = re.search(r"hem[áa]cias[\s\n]*(\d+-\d+)", texto, re.IGNORECASE)
31
- eas_bact = re.search(r"bact[ée]rias[\s\n]*(\w+)", texto, re.IGNORECASE)
32
- eas_final = ""
33
- if eas_leuc and eas_hem and eas_bact:
34
- eas_final = f"EAS: {eas_leuc.group(1)} LEUCÓCITOS + {eas_hem.group(1)} HEMÁCIAS + BACTÉRIAS {eas_bact.group(1).upper()} //"
35
 
36
- campos = [
37
- ("LAC", r"lactato[\s\n]+([\d,\.]+)"),
38
- ("AMIL", r"amilase[\s\n]+([\d,\.]+)"),
39
- ("AC UR", r"[áa]cido[ \n]+[úu]rico[\s\n]+([\d,\.]+)"),
40
- ("BT", r"bilirrubina total[\s\n]+([\d,\.]+)"),
41
- ("BD", r"bilirrubina direta[\s\n]+([\d,\.]+)"),
42
- ("BI", r"bilirrubina indireta[\s\n]+([\d,\.]+)"),
43
- ("CAI", r"ioniz[áa]vel[\s\n]+([\d,\.]+)"),
44
- ("CL-", r"cl[óo]ro[\s\n]+([\d,\.]+)"),
45
- ("CR", r"creatinina[\s\n]+([\d,\.]+)"),
46
- ("FAL", r"fosfatase alcalina[\s\n]+([\d,\.]+)"),
47
- ("P", r"f[óo]sforo[\s\n]+([\d,\.]+)"),
48
- ("GGT", r"gama.?gt[\s\n]+([\d,\.]+)"),
49
- ("GLI", r"glicose[\s\n]+([\d,\.]+)"),
50
- ("HB", r"hemoglobina[\s\n]+([\d,\.]+)"),
51
- ("HT", r"hemat[óo]crito[\s\n]+([\d,\.]+)"),
52
- ("LEUCO", r"leuc[óo]citos[\s\n]+([\d,\.]+)"),
53
- ("B", r"bastonetes[\s\n]+([\d,\.]+)"),
54
- ("SS", r"segmentados[\s\n]+([\d,\.]+)"),
55
- ("PLT", r"plaquetas[\s\n]+([\d,\.]+)"),
56
- ("LIP", r"lipase[\s\n]+([\d,\.]+)"),
57
- ("MG++", r"magn[ée]sio[\s\n]+([\d,\.]+)"),
58
- ("PCR", r"PCR[\s\n]+([\d,\.]+)"),
59
- ("K+", r"pot[áa]ssio[\s\n]+([\d,\.]+)"),
60
- ("PTN", r"prote[ií]na[s]? totais?[\s\n]+([\d,\.]+)"),
61
- ("ALB", r"albumina[\s\n]+([\d,\.]+)"),
62
- ("NA+", r"s[óo]dio[\s\n]+([\d,\.]+)"),
63
- ("TGO", r"TGO[\s\n]+([\d,\.]+)"),
64
- ("TGP", r"TGP[\s\n]+([\d,\.]+)"),
65
- ("TAP", r"TAP.*?([\d,\.]+)"),
66
- ("INR", r"INR.*?([\d,\.]+)"),
67
- ("TTP", r"TTP.*?([\d,\.]+)"),
68
- ("RELAÇÃO", r"relaç[aã]o.*?([\d,\.]+)"),
69
- ("UR", r"ureia[\s\n]+([\d,\.]+)"),
70
- ("CPK", r"CPK.*?([\d,\.]+)"),
71
- ("CKMB", r"CKMB.*?([\d,\.]+)"),
72
- ("TROPO", r"tropo.*?([<>=]?[\d,\.]+)")
73
- ]
74
 
75
- output = [eas_final] if eas_final else []
76
- temp_dict = {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
 
78
- for rotulo, regex in campos:
79
- val = extrair_valor(regex)
80
- if val:
81
- temp_dict[rotulo] = val
 
 
 
 
 
 
82
 
83
- if "LEUCO" in temp_dict:
84
- l = k_format(temp_dict["LEUCO"])
85
- b = f"{temp_dict['B']}% B" if "B" in temp_dict else ""
86
- ss = f"{temp_dict['SS']}% SS" if "SS" in temp_dict else ""
87
- combo = f"LEUCO {l}" + (f" + {b}" if b else "") + (f" + {ss}" if ss else "")
88
- output.append(combo)
89
- temp_dict.pop("LEUCO")
90
- temp_dict.pop("B", None)
91
- temp_dict.pop("SS", None)
92
-
93
- ordem = [k for k, _ in campos if k not in ["LEUCO", "B", "SS"]]
94
- for campo in ordem:
95
- if campo in temp_dict:
96
- output.append(f"{campo} {temp_dict[campo]}")
97
-
98
- return " / ".join(output) if output else "Nenhum dado encontrado."
99
 
 
100
  with gr.Blocks() as demo:
101
- gr.Markdown("## Extrator de Exames - PDF Clínico Compactado")
102
  with gr.Row():
103
- pdf = gr.File(label="PDF de exames", file_types=[".pdf"])
104
- btn = gr.Button("Extrair")
105
-
106
- resultado = gr.Textbox(label="Exames extraídos", lines=4)
107
  btn.click(fn=extrair_exames_formatado, inputs=pdf, outputs=resultado)
108
 
109
  demo.launch()
 
6
  if pdf_file is None:
7
  return "Nenhum arquivo enviado."
8
 
 
9
  texto = ""
10
+ with fitz.open(pdf_file.name) as doc:
11
+ for page in doc:
12
+ texto += page.get_text()
13
 
14
+ def buscar(padrao):
15
  match = re.search(padrao, texto, re.IGNORECASE)
16
  if match:
17
  return match.group(1).replace(",", ".").strip()
 
20
  def k_format(v):
21
  try:
22
  n = float(v.replace(".", "").replace(",", "."))
23
+ return f"{round(n / 1000, 1)}K" if n >= 1000 else str(n)
 
 
24
  except:
25
  return v
26
 
27
+ # Coleta combinada de Leucócitos, Bastonetes e Segmentados
28
+ leuco = buscar(r"leuc[óo]citos\s+([\d.,]+)")
29
+ bastonetes = buscar(r"bastonetes\s+(\d+)\s*%")
30
+ segmentados = buscar(r"segmentados\s+(\d+)\s*%")
 
 
31
 
32
+ leuco_str = ""
33
+ if leuco:
34
+ leuco_str = f"LEUCO {k_format(leuco)}"
35
+ if bastonetes:
36
+ leuco_str += f" + {bastonetes}% B"
37
+ if segmentados:
38
+ leuco_str += f" + {segmentados}% SS"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
+ exames = {
41
+ "🟠 Renal / Eletrólitos": {
42
+ "UREIA": buscar(r"ureia\s+([\d.,]+)"),
43
+ "CR": buscar(r"creatinina\s+([\d.,]+)"),
44
+ "K+": buscar(r"pot[áa]ssio\s+([\d.,]+)"),
45
+ "NA+": buscar(r"s[óo]dio\s+([\d.,]+)"),
46
+ "CL-": buscar(r"cl[óo]ro\s+([\d.,]+)"),
47
+ "CAI": buscar(r"ioniz[áa]vel\s+([\d.,]+)"),
48
+ "CA TOTAL": buscar(r"c[áa]lcio total\s+([\d.,]+)"),
49
+ "MG++": buscar(r"magn[ée]sio\s+([\d.,]+)"),
50
+ "FÓS": buscar(r"f[óo]sforo\s+([\d.,]+)"),
51
+ },
52
+ "🟡 Hepático": {
53
+ "BT": buscar(r"bilirrubina total\s+([\d.,]+)"),
54
+ "BD": buscar(r"bilirrubina direta\s+([\d.,]+)"),
55
+ "BI": buscar(r"bilirrubina indireta\s+([\d.,]+)"),
56
+ "TGO": buscar(r"TGO\s+([\d.,]+)"),
57
+ "TGP": buscar(r"TGP\s+([\d.,]+)"),
58
+ "GGT": buscar(r"gama.?gt\s+([\d.,]+)"),
59
+ "FAL": buscar(r"fosfatase alcalina\s+([\d.,]+)"),
60
+ "ALB": buscar(r"albumina\s+([\d.,]+)"),
61
+ "PTN TOTAL": buscar(r"prote[ií]nas totais\s+([\d.,]+)"),
62
+ "GLOB": buscar(r"globulina\s+([\d.,]+)"),
63
+ "RELAÇÃO": buscar(r"relaç[aã]o A/G\s+([\d.,]+)")
64
+ },
65
+ "🔴 Hematológico": {
66
+ "HB": buscar(r"hemoglobina\s+([\d.,]+)"),
67
+ "HT": buscar(r"hemat[óo]crito\s+([\d.,]+)"),
68
+ "PLT": buscar(r"plaquetas\s+([\d.,]+)")
69
+ },
70
+ "🔵 Coagulação": {
71
+ "TAP": buscar(r"TEMPO\s+14,4|TAP.*?([\d.,]+)"),
72
+ "INR": buscar(r"INR\s*[:\-]?\s*([\d.,]+)"),
73
+ "TTP": buscar(r"TTPA.*?tempo\s+([\d.,]+)"),
74
+ "RELAÇÃO": buscar(r"relaç[aã]o.*?([\d.,]+)")
75
+ },
76
+ "🟢 Metabólico": {
77
+ "GLI": buscar(r"glicose\s+([\d.,]+)"),
78
+ "LIP": buscar(r"lipase\s+([\d.,]+)"),
79
+ "AMIL": buscar(r"amilase\s+([\d.,]+)"),
80
+ "AC UR": buscar(r"[áa]cido[ \n]+[úu]rico\s+([\d.,]+)"),
81
+ "LAC": buscar(r"lactato.*?([\d.,]+)"),
82
+ "PCR": buscar(r"PCR.*?([\d.,]+)")
83
+ }
84
+ }
85
 
86
+ output = []
87
+ if leuco_str:
88
+ output.append(leuco_str)
89
+ for sistema, dados in exames.items():
90
+ linha = []
91
+ for k, v in dados.items():
92
+ if v:
93
+ linha.append(f"{k}: {v}")
94
+ if linha:
95
+ output.append(f"{sistema} → " + " / ".join(linha))
96
 
97
+ return "\n".join(output) if output else "Nenhum dado encontrado."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
 
99
+ # Gradio Interface
100
  with gr.Blocks() as demo:
101
+ gr.Markdown("## 🧪 Extrator de Exames - PDF Clínico Compactado")
102
  with gr.Row():
103
+ pdf = gr.File(label="📄 PDF de exames", file_types=[".pdf"])
104
+ btn = gr.Button("🔍 Extrair Exames")
105
+ resultado = gr.Textbox(label="📋 Resultados Extraídos", lines=20)
 
106
  btn.click(fn=extrair_exames_formatado, inputs=pdf, outputs=resultado)
107
 
108
  demo.launch()