GABRIELSZK commited on
Commit
95b85d7
·
verified ·
1 Parent(s): 4b62dc4

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -124
app.py DELETED
@@ -1,124 +0,0 @@
1
-
2
- import fitz
3
- import re
4
- import gradio as gr
5
- import pandas as pd
6
- import tempfile
7
- import pytesseract
8
- from PIL import Image, ImageEnhance, ImageFilter
9
- import io
10
-
11
- def classificar(nome, valor):
12
- faixas = {
13
- "HB": (12, 17), "HT": (36, 50), "GLI": (70, 99), "UREIA": (10, 50),
14
- "CR": (0.6, 1.3), "K+": (3.5, 5.5), "NA+": (135, 145), "TGO": (0, 40),
15
- "TGP": (0, 40), "ALB": (3.5, 5.0), "INR": (0.8, 1.2), "TAP": (10, 14),
16
- "TTP": (25, 35), "LAC": (0.5, 2.2), "PLT": (150, 450), "LEUCO": (4, 11),
17
- "CKMB": (0, 24), "CPK": (0, 190), "TROPO": (0, 0.04), "AMIL": (28, 100),
18
- "LIP": (0, 60), "PCR": (0, 1), "ÁC UR": (3.5, 7.2), "FAL": (44, 147),
19
- "GGT": (0, 38), "FÓS": (2.5, 4.5), "MG++": (1.6, 2.6), "CA TOTAL": (8.6, 10.2),
20
- "CAI": (1.1, 1.35), "BT": (0.2, 1.2), "BD": (0, 0.4), "BI": (0.1, 0.8), "CL-": (96, 106)
21
- }
22
- try:
23
- val = float(valor.replace("K", "").replace(">", "").replace("<", "").strip())
24
- if nome in faixas:
25
- min_v, max_v = faixas[nome]
26
- if val < min_v:
27
- return f"{valor} ↓"
28
- elif val > max_v:
29
- return f"{valor} ↑"
30
- return valor
31
- except:
32
- return valor
33
-
34
- def melhorar_imagem(img):
35
- img = img.convert('L')
36
- img = ImageEnhance.Contrast(img).enhance(2)
37
- img = img.filter(ImageFilter.SHARPEN)
38
- return img
39
-
40
- def limpar_texto(texto):
41
- texto = re.sub(r'\b([A-Z])\s+([A-Z])\b', r'\1\2', texto) # Junta siglas com espaços (ex: "C P K" → "CPK")
42
- return re.sub(r'\s+', ' ', texto)
43
-
44
- def extrair_texto_pdf(pdf_file):
45
- texto_fitz = ""
46
- imagens_ocr = []
47
- with fitz.open(pdf_file.name) as doc:
48
- for page in doc:
49
- texto_fitz += page.get_text()
50
- pix = page.get_pixmap(dpi=400)
51
- img = Image.open(io.BytesIO(pix.tobytes("png")))
52
- imagens_ocr.append(melhorar_imagem(img))
53
- texto_fitz = limpar_texto(texto_fitz)
54
- texto_ocr = limpar_texto(" ".join(pytesseract.image_to_string(im) for im in imagens_ocr))
55
- return texto_fitz, texto_ocr
56
-
57
- def buscar_exame(textos, padrao):
58
- for texto in textos:
59
- match = re.search(padrao, texto, re.IGNORECASE)
60
- if match:
61
- return match.group(1).replace(",", ".").strip()
62
- return None
63
-
64
- def extrair_exames_formatado(pdf_file):
65
- if not pdf_file:
66
- return "Nenhum arquivo enviado.", None
67
-
68
- texto_fitz, texto_ocr = extrair_texto_pdf(pdf_file)
69
- textos = [texto_fitz, texto_ocr] # Sempre considerar ambos
70
-
71
- exames = {
72
- "AMIL": r"amilase[^\d]{0,10}([\d.,]+)",
73
- "ÁC UR": r"ácido[\s]?úrico[^\d]{0,10}([\d.,]+)",
74
- "BT": r"bilirrubina total|bt[^\d]{0,10}([\d.,]+)",
75
- "BD": r"bilirrubina direta|bd[^\d]{0,10}([\d.,]+)",
76
- "BI": r"bilirrubina indireta|bi[^\d]{0,10}([\d.,]+)",
77
- "CAI": r"cálcio ionizável|cai[^\d]{0,10}([\d.,]+)",
78
- "CA TOTAL": r"cálcio total[^\d]{0,10}([\d.,]+)",
79
- "CL-": r"cloro[^\d]{0,10}([\d.,]+)",
80
- "CR": r"creatinina[^\d]{0,10}([\d.,]+)",
81
- "FAL": r"fosfatase alcalina|fal[^\d]{0,10}([\d.,]+)",
82
- "FÓS": r"f[óo]sforo[^\d]{0,10}([\d.,]+)",
83
- "GGT": r"gama.*?gt|ggt[^\d]{0,10}([\d.,]+)",
84
- "GLI": r"glicose[^\d]{0,10}([\d.,]+)",
85
- "LIP": r"lipase[^\d]{0,10}([\d.,]+)",
86
- "MG++": r"magn[ée]sio[^\d]{0,10}([\d.,]+)",
87
- "PCR": r"pcr[^\d]{0,10}([\d.,]+)",
88
- "K+": r"pot[áa]ssio[^\d]{0,10}([\d.,]+)",
89
- "PTN": r"proteínas totais[^\d]{0,10}([\d.,]+)",
90
- "ALB": r"albumina[^\d]{0,10}([\d.,]+)",
91
- "GLOB": r"globulina[^\d]{0,10}([\d.,]+)",
92
- "RELAÇÃO": r"relação.*?a/g[^\d]{0,10}([\d.,]+)",
93
- "NA+": r"s[óo]dio[^\d]{0,10}([\d.,]+)",
94
- "TGO": r"tgo[^\d]{0,10}([\d.,]+)",
95
- "TGP": r"tgp[^\d]{0,10}([\d.,]+)",
96
- "TAP": r"tap[^\d]{0,10}([\d.,]+)",
97
- "INR": r"inr[^\d]{0,10}([\d.,]+)",
98
- "TTP": r"ttpa[^\d]{0,10}([\d.,]+)",
99
- "UREIA": r"ureia[^\d]{0,10}([\d.,]+)",
100
- "LAC": r"lactato[^\d]{0,10}([\d.,]+)",
101
- "LEUCO": r"leuc[óo]citos[^\d]{0,10}([\d.,]+)",
102
- "HB": r"hemoglobina[^\d]{0,10}([\d.,]+)",
103
- "HT": r"hemat[óo]crito[^\d]{0,10}([\d.,]+)",
104
- "PLT": r"plaquetas[^\d]{0,10}([\d.,]+)",
105
- "CPK": r"(?:cpk|creatinofosfoquinase)[^\d]{0,10}([\d.,]+)",
106
- "CKMB": r"(?:ck[- ]?mb|ckmb massa)[^\d]{0,10}([\d.,]+)",
107
- "TROPO": r"(?:troponina)[^\d]{0,10}([<>]?[\d.,]+)"
108
- }
109
-
110
- resultados = [(exame, classificar(exame, buscar_exame(textos, padrao) or "—")) for exame, padrao in exames.items()]
111
- df = pd.DataFrame(resultados, columns=["Exame", "Valor"])
112
- temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
113
- df.to_csv(temp_file.name, index=False)
114
- texto_final = "\n".join(f"{e}: {v}" for e, v in resultados)
115
- return texto_final, temp_file.name
116
-
117
- with gr.Blocks() as demo:
118
- pdf_file = gr.File(label="📄 PDF Exames")
119
- extract_button = gr.Button("🔍 Extrair")
120
- output_text = gr.Textbox(label="📋 Resultados", lines=25)
121
- download_button = gr.File(label="📥 CSV")
122
- extract_button.click(extrair_exames_formatado, pdf_file, [output_text, download_button])
123
-
124
- demo.launch()