Spaces:

GABRIELSZK
/

EXAMES

Sleeping

GABRIELSZK commited on Apr 25

Commit

96fc805

verified ·

1 Parent(s): c4ebbf4

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -53,13 +53,13 @@ def extrair_texto_pdf(pdf_file):
     texto_ocr = re.sub(r'\s+', ' ', texto_ocr)
     return texto_fitz, texto_ocr
-# Padrões regex otimizados
 exames = {
     "LEUCO":   r"leuc[óo]citos.*?([\d.,]+)\s?(?:10\^3)?/u?l",
     "B":       r"bas[óo]filos.*?([\d.,]+)\s?%",
     "SS":      r"segmentados.*?([\d.,]+)\s?%",
     "EOS":     r"eosin[óo]filos.*?([\d.,]+)\s?%",
-    "LINF":    r"linfocitos.*?([\d.,]+)\s?%",
     "MONO":    r"mon[óo]citos.*?([\d.,]+)\s?%",
     "HB":      r"hemoglobina.*?([\d.,]+)\s?g/dl",
     "HT":      r"hemat[óo]crito.*?([\d.,]+)\s?%",
@@ -90,7 +90,7 @@ exames = {
     "TGO":     r"tgo.*?([\d.,]+)\s?u/l",
     "TGP":     r"tgp.*?([\d.,]+)\s?u/l",
     "TAP":     r"tempo de protrombina.*?\bresultado\b\s*([\d]+,[\d]+)",
-    "INR":     r"inr.*?([\d.,]+)",
     "TTP":     r"ttpa.*?([\d.,]+)\s?seg",
     "LAC":     r"lactato.*?([\d.,]+)\s?mmol/l",
     "CKMB":    r"ck[- ]?mb.*?([\d.,]+)\s?u/l",
@@ -132,7 +132,7 @@ def extrair_exames_formatado(pdf_file):
 # Interface Gradio
 with gr.Blocks() as demo:
-    gr.Markdown("## 🧪 Extrator Avançado com OCR - Regex Otimizado")
     pdf_file = gr.File(label="📄 PDF de exames", file_types=[".pdf"])
     btn = gr.Button("🔍 Extrair Exames")
     out_txt = gr.Textbox(label="📋 Exames Classificados", lines=10)
@@ -140,5 +140,4 @@ with gr.Blocks() as demo:
     btn.click(extrair_exames_formatado, inputs=pdf_file, outputs=[out_txt, dl])
 if __name__ == '__main__':
-    demo.launch()

     texto_ocr = re.sub(r'\s+', ' ', texto_ocr)
     return texto_fitz, texto_ocr
+# Padrões regex corrigidos para LINF e INR
 exames = {
     "LEUCO":   r"leuc[óo]citos.*?([\d.,]+)\s?(?:10\^3)?/u?l",
     "B":       r"bas[óo]filos.*?([\d.,]+)\s?%",
     "SS":      r"segmentados.*?([\d.,]+)\s?%",
     "EOS":     r"eosin[óo]filos.*?([\d.,]+)\s?%",
+    "LINF":    r"linf[oó]citos.*?([\d.,]+)\s?%",
     "MONO":    r"mon[óo]citos.*?([\d.,]+)\s?%",
     "HB":      r"hemoglobina.*?([\d.,]+)\s?g/dl",
     "HT":      r"hemat[óo]crito.*?([\d.,]+)\s?%",
     "TGO":     r"tgo.*?([\d.,]+)\s?u/l",
     "TGP":     r"tgp.*?([\d.,]+)\s?u/l",
     "TAP":     r"tempo de protrombina.*?\bresultado\b\s*([\d]+,[\d]+)",
+    "INR":     r"I\s*N\s*R\s+([\d]+,[\d]+)",
     "TTP":     r"ttpa.*?([\d.,]+)\s?seg",
     "LAC":     r"lactato.*?([\d.,]+)\s?mmol/l",
     "CKMB":    r"ck[- ]?mb.*?([\d.,]+)\s?u/l",
 # Interface Gradio
 with gr.Blocks() as demo:
+    gr.Markdown("## 🧪 Extrator Avançado com OCR - Regex Final")
     pdf_file = gr.File(label="📄 PDF de exames", file_types=[".pdf"])
     btn = gr.Button("🔍 Extrair Exames")
     out_txt = gr.Textbox(label="📋 Exames Classificados", lines=10)
     btn.click(extrair_exames_formatado, inputs=pdf_file, outputs=[out_txt, dl])
 if __name__ == '__main__':
+    demo.launch()