GABRIELSZK commited on
Commit
96fc805
verified
1 Parent(s): c4ebbf4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -6
app.py CHANGED
@@ -53,13 +53,13 @@ def extrair_texto_pdf(pdf_file):
53
  texto_ocr = re.sub(r'\s+', ' ', texto_ocr)
54
  return texto_fitz, texto_ocr
55
 
56
- # Padr玫es regex otimizados
57
  exames = {
58
  "LEUCO": r"leuc[贸o]citos.*?([\d.,]+)\s?(?:10\^3)?/u?l",
59
  "B": r"bas[贸o]filos.*?([\d.,]+)\s?%",
60
  "SS": r"segmentados.*?([\d.,]+)\s?%",
61
  "EOS": r"eosin[贸o]filos.*?([\d.,]+)\s?%",
62
- "LINF": r"linfocitos.*?([\d.,]+)\s?%",
63
  "MONO": r"mon[贸o]citos.*?([\d.,]+)\s?%",
64
  "HB": r"hemoglobina.*?([\d.,]+)\s?g/dl",
65
  "HT": r"hemat[贸o]crito.*?([\d.,]+)\s?%",
@@ -90,7 +90,7 @@ exames = {
90
  "TGO": r"tgo.*?([\d.,]+)\s?u/l",
91
  "TGP": r"tgp.*?([\d.,]+)\s?u/l",
92
  "TAP": r"tempo de protrombina.*?\bresultado\b\s*([\d]+,[\d]+)",
93
- "INR": r"inr.*?([\d.,]+)",
94
  "TTP": r"ttpa.*?([\d.,]+)\s?seg",
95
  "LAC": r"lactato.*?([\d.,]+)\s?mmol/l",
96
  "CKMB": r"ck[- ]?mb.*?([\d.,]+)\s?u/l",
@@ -132,7 +132,7 @@ def extrair_exames_formatado(pdf_file):
132
 
133
  # Interface Gradio
134
  with gr.Blocks() as demo:
135
- gr.Markdown("## 馃И Extrator Avan莽ado com OCR - Regex Otimizado")
136
  pdf_file = gr.File(label="馃搫 PDF de exames", file_types=[".pdf"])
137
  btn = gr.Button("馃攳 Extrair Exames")
138
  out_txt = gr.Textbox(label="馃搵 Exames Classificados", lines=10)
@@ -140,5 +140,4 @@ with gr.Blocks() as demo:
140
  btn.click(extrair_exames_formatado, inputs=pdf_file, outputs=[out_txt, dl])
141
 
142
  if __name__ == '__main__':
143
- demo.launch()
144
-
 
53
  texto_ocr = re.sub(r'\s+', ' ', texto_ocr)
54
  return texto_fitz, texto_ocr
55
 
56
+ # Padr玫es regex corrigidos para LINF e INR
57
  exames = {
58
  "LEUCO": r"leuc[贸o]citos.*?([\d.,]+)\s?(?:10\^3)?/u?l",
59
  "B": r"bas[贸o]filos.*?([\d.,]+)\s?%",
60
  "SS": r"segmentados.*?([\d.,]+)\s?%",
61
  "EOS": r"eosin[贸o]filos.*?([\d.,]+)\s?%",
62
+ "LINF": r"linf[o贸]citos.*?([\d.,]+)\s?%",
63
  "MONO": r"mon[贸o]citos.*?([\d.,]+)\s?%",
64
  "HB": r"hemoglobina.*?([\d.,]+)\s?g/dl",
65
  "HT": r"hemat[贸o]crito.*?([\d.,]+)\s?%",
 
90
  "TGO": r"tgo.*?([\d.,]+)\s?u/l",
91
  "TGP": r"tgp.*?([\d.,]+)\s?u/l",
92
  "TAP": r"tempo de protrombina.*?\bresultado\b\s*([\d]+,[\d]+)",
93
+ "INR": r"I\s*N\s*R\s+([\d]+,[\d]+)",
94
  "TTP": r"ttpa.*?([\d.,]+)\s?seg",
95
  "LAC": r"lactato.*?([\d.,]+)\s?mmol/l",
96
  "CKMB": r"ck[- ]?mb.*?([\d.,]+)\s?u/l",
 
132
 
133
  # Interface Gradio
134
  with gr.Blocks() as demo:
135
+ gr.Markdown("## 馃И Extrator Avan莽ado com OCR - Regex Final")
136
  pdf_file = gr.File(label="馃搫 PDF de exames", file_types=[".pdf"])
137
  btn = gr.Button("馃攳 Extrair Exames")
138
  out_txt = gr.Textbox(label="馃搵 Exames Classificados", lines=10)
 
140
  btn.click(extrair_exames_formatado, inputs=pdf_file, outputs=[out_txt, dl])
141
 
142
  if __name__ == '__main__':
143
+ demo.launch()