Spaces:

GABRIELSZK
/

EXAMES

Sleeping

App Files Files Community

GABRIELSZK commited on Apr 27

Commit

7fc9d6d

verified ·

1 Parent(s): ab9abcf

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -74

app.py CHANGED Viewed

@@ -1,6 +1,3 @@
-# Instalações necessárias
-!pip install pdfplumber gradio pandas pytesseract --quiet
 import fitz
 import re
 import gradio as gr
@@ -12,14 +9,13 @@ import io
 # Faixas de referência para classificação
 faixas = {
-    "HB": (11.5, 16.5), "HT": (36, 50), "LEUCO": (4000, 11000), "PLT": (150000, 450000),
     "K+": (3.5, 5.5), "NA+": (135, 145), "UREIA": (10, 50), "CR": (0.6, 1.3),
     "TGO": (0, 40), "TGP": (0, 40), "ALB": (3.5, 5.0), "INR": (0.8, 1.2),
     "TAP": (10, 14), "TTP": (25, 35)
 }
 def classificar(nome, valor):
-    """Adiciona setas se valor numérico estiver fora da faixa de referência."""
     try:
         raw = valor.replace(">", "").replace("<", "").strip()
         val = float(raw)
@@ -32,13 +28,11 @@ def classificar(nome, valor):
         return valor
 def melhorar_imagem(img: Image.Image) -> Image.Image:
-    """Aumenta contraste e nitidez para OCR."""
     img = img.convert('L')
     img = ImageEnhance.Contrast(img).enhance(2)
     return img.filter(ImageFilter.SHARPEN)
 def extrair_texto_pdf(pdf_file):
-    """Extrai texto nativo e via OCR de cada página."""
     texto_nativo = []
     ocr_imgs = []
     with fitz.open(pdf_file.name) as doc:
@@ -53,111 +47,54 @@ def extrair_texto_pdf(pdf_file):
     tocr = re.sub(r'\s+', ' ', tocr)
     return tn, tocr
-# Padrões regex (case-insensitive) para todos os exames, incluindo Troponina Qualitativa
 exames = {
-    # Hemograma e diferenciais
-    "LEUCO":   r"leuc[óo]citos.*?([\d.,]+)\s?/u?l",
-    "B":       r"bas[óo]filos.*?([\d.,]+)\s?%",
-    "SS":      r"segmentados.*?([\d.,]+)\s?%",
-    "EOS":     r"eosin[óo]filos.*?([\d.,]+)\s?%",
-    "LINF":    r"linf[oó]citos.*?([\d.,]+)\s?%",
-    "MONO":    r"mon[óo]citos.*?([\d.,]+)\s?%",
-    "HB":      r"hemoglobina.*?([\d.,]+)\s?g/dl",
-    "HT":      r"hemat[óo]crito.*?([\d.,]+)\s?%",
-    "PLT":     r"plaquetas.*?([\d.,]+).?/u?l",
-    # Bioquímica
-    "AMIL":    r"amilase.*?([\d.,]+)\s?u/l",
-    "LIP":     r"lipase.*?([\d.,]+)\s?u/l",
-    "GLI":     r"glicose.*?([\d.,]+)\s?mg/dl",
-    "LACTATO": r"lactato.*?([\d.,]+)\s?mmol/l",
-    "ÁC UR":   r"[áa]cido ur[íi]co.*?([\d.,]+)\s?mg/dl",
-    "BT":      r"bilirrubina total.*?([\d.,]+)\s?mg/dl",
-    "BD":      r"bilirrubina direta.*?([\d.,]+)\s?mg/dl",
-    "BI":      r"bilirrubina indireta.*?([\d.,]+)\s?mg/dl",
-    "CAI":     r"c[áa]lcio ioniza(?:do)?.*?([\d.,]+)\s?mmol/l",
-    "CA TOTAL":r"c[áa]lcio total.*?([\d.,]+)\s?mg/dl",
-    "CL-":     r"cloro.*?([\d.,]+)\s?mmol/l",
-    "MG++":    r"magn[ée]sio.*?([\d.,]+)\s?mg/dl",
-    "FÓS":     r"f[oó]sforo.*?([\d.,]+)\s?mg/dl",
-    "UREIA":   r"ureia.*?([\d.,]+)\s?mg/dl",
-    "CR":      r"creatinina.*?([\d.,]+)\s?mg/dl",
-    # Hepática e proteínas
-    "TGO":     r"tgo.*?([\d.,]+)\s?u/l",
-    "TGP":     r"tgp.*?([\d.,]+)\s?u/l",
-    "GGT":     r"ggt.*?([\d.,]+)\s?u/l",
-    "FAL":     r"fosfatase alcalina.*?([\d.,]+)\s?u/l",
-    "ALB":     r"albumina.*?([\d.,]+)\s?g/dl",
-    "PTN TOTAL":r"prote[ií]na total.*?([\d.,]+)\s?g/dl",
-    "GLOB":    r"globulina.*?([\d.,]+)\s?g/dl",
-    "RELAÇÃO": r"rela[cç][ãa]o\s+a\/g.*?([\d.,]+)",
-    # Coagulação
-    "TAP":     r"tempo de protrombina.*?resultado\s*([\d.,]+)",
-    "INR":     r"inr\s*([\d.,]+)",
-    "TTP":     r"ttpa.*?([\d.,]+)\s?seg",
-    # Inflamatório
-    "PCR":     r"pcr.*?resultado\s*([\d.,]+)",
-    # Cardíacos
-    "CKMB":    r"ck[- ]?mb.*?([\d.,]+)\s?u/l",
-    "CPK":     r"cpk.*?resultado\s*([\d.,]+)",
-    "TROPO":   r"troponina\s*(?!qual).*?([<>]?[\d.,]+)\s?ng/ml",
     "TROPONINA QUAL": r"troponina qualitativa.*?resultado\s*([A-Za-z]+)",
-    # EAS (urina)
     "LEUC ESTERASE": r"leuc[óo]cito esterase.*?([A-Za-z\+\-]+)",
     "LEUCO EAS":     r"leuc[óo]citos?.*?([\d]+\s*[-\/]\s*\d+)",
     "HEMA EAS":      r"hem[áa]cias?.*?([\d]+\s*[-\/]\s*\d+)",
     "BACTERIAS":     r"bact[ée]rias?.*?([A-Za-z]+)"
 }
-# Ordem preferencial de exibição (numéricos e qualitativos)
 ordem = [
-    "LEUCO","B","SS","EOS","LINF","MONO",
-    "HB","HT","PLT","AMIL","LIP","GLI","LACTATO",
-    "ÁC UR","BT","BD","BI","CAI","CA TOTAL","CL-","MG++","FÓS","UREIA","CR",
-    "TGO","TGP","GGT","FAL","ALB","PTN TOTAL","GLOB","RELAÇÃO",
-    "TAP","INR","TTP","PCR","DIMERO D",
-    "CKMB","CPK","TROPO","TROPONINA QUAL"
 ]
 def extrair_exames_formatado(pdf_file):
     if not pdf_file:
         return "Nenhum arquivo enviado.", None
-    # extrai texto
     tn, tocr = extrair_texto_pdf(pdf_file)
-    textos = tn + " " + tocr
     resultados = {}
-    # varre todos os padrões
     for nome, pat in exames.items():
-        m = re.search(pat, textos, re.IGNORECASE)
         if m:
             val = m.group(1).strip().replace(",", ".")
-            # normaliza QUAL como uppercase
             if nome == "TROPONINA QUAL":
                 val = val.upper()
             resultados[nome] = classificar(nome, val)
-    # monta string de EAS
-    eas_chaves = ["LEUC ESTERASE","LEUCO EAS","HEMA EAS","BACTERIAS"]
-    partes_eas = [f"{k}: {resultados[k]}" for k in eas_chaves if k in resultados]
     texto_eas = ""
     if partes_eas:
         texto_eas = "🟤 EAS (Urinálise) → " + " / ".join(partes_eas)
-    # monta string principal
     partes_main = [f"{r}: {resultados[r]}" for r in ordem if r in resultados]
     texto_main = " / ".join(partes_main)
-    # concatena só as partes não vazias
     texto_final = "\n".join([t for t in (texto_eas, texto_main) if t])
-    # gera CSV
     df = pd.DataFrame([[k, resultados[k]] for k in resultados], columns=["Exame","Valor"])
     temp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
     df.to_csv(temp.name, index=False)
     return texto_final, temp.name
-# interface Gradio
 with gr.Blocks() as demo:
     gr.Markdown("## 🧪 Extrator Avançado com OCR + EAS + Troponina Qualitativa")
     pdf_input = gr.File(label="📄 PDF de exames", file_types=[".pdf"])
@@ -168,4 +105,3 @@ with gr.Blocks() as demo:
 if __name__ == "__main__":
     demo.launch()

 import fitz
 import re
 import gradio as gr
 # Faixas de referência para classificação
 faixas = {
+    "HB": (12, 17), "HT": (36, 50), "LEUCO": (4, 11), "PLT": (150, 450),
     "K+": (3.5, 5.5), "NA+": (135, 145), "UREIA": (10, 50), "CR": (0.6, 1.3),
     "TGO": (0, 40), "TGP": (0, 40), "ALB": (3.5, 5.0), "INR": (0.8, 1.2),
     "TAP": (10, 14), "TTP": (25, 35)
 }
 def classificar(nome, valor):
     try:
         raw = valor.replace(">", "").replace("<", "").strip()
         val = float(raw)
         return valor
 def melhorar_imagem(img: Image.Image) -> Image.Image:
     img = img.convert('L')
     img = ImageEnhance.Contrast(img).enhance(2)
     return img.filter(ImageFilter.SHARPEN)
 def extrair_texto_pdf(pdf_file):
     texto_nativo = []
     ocr_imgs = []
     with fitz.open(pdf_file.name) as doc:
     tocr = re.sub(r'\s+', ' ', tocr)
     return tn, tocr
 exames = {
+    # ... mesma definição de regex que antes ...
     "TROPONINA QUAL": r"troponina qualitativa.*?resultado\s*([A-Za-z]+)",
+    # EAS
     "LEUC ESTERASE": r"leuc[óo]cito esterase.*?([A-Za-z\+\-]+)",
     "LEUCO EAS":     r"leuc[óo]citos?.*?([\d]+\s*[-\/]\s*\d+)",
     "HEMA EAS":      r"hem[áa]cias?.*?([\d]+\s*[-\/]\s*\d+)",
     "BACTERIAS":     r"bact[ée]rias?.*?([A-Za-z]+)"
 }
 ordem = [
+    # ... mesma ordem que antes, incluindo "TROPONINA QUAL" no fim ...
 ]
 def extrair_exames_formatado(pdf_file):
     if not pdf_file:
         return "Nenhum arquivo enviado.", None
     tn, tocr = extrair_texto_pdf(pdf_file)
+    txt = tn + " " + tocr
     resultados = {}
     for nome, pat in exames.items():
+        m = re.search(pat, txt, re.IGNORECASE)
         if m:
             val = m.group(1).strip().replace(",", ".")
             if nome == "TROPONINA QUAL":
                 val = val.upper()
             resultados[nome] = classificar(nome, val)
+    # Monta saída EAS
+    eas_keys = ["LEUC ESTERASE","LEUCO EAS","HEMA EAS","BACTERIAS"]
+    partes_eas = [f"{k}: {resultados[k]}" for k in eas_keys if k in resultados]
     texto_eas = ""
     if partes_eas:
         texto_eas = "🟤 EAS (Urinálise) → " + " / ".join(partes_eas)
+    # Monta saída principal
     partes_main = [f"{r}: {resultados[r]}" for r in ordem if r in resultados]
     texto_main = " / ".join(partes_main)
     texto_final = "\n".join([t for t in (texto_eas, texto_main) if t])
     df = pd.DataFrame([[k, resultados[k]] for k in resultados], columns=["Exame","Valor"])
     temp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
     df.to_csv(temp.name, index=False)
     return texto_final, temp.name
 with gr.Blocks() as demo:
     gr.Markdown("## 🧪 Extrator Avançado com OCR + EAS + Troponina Qualitativa")
     pdf_input = gr.File(label="📄 PDF de exames", file_types=[".pdf"])
 if __name__ == "__main__":
     demo.launch()