Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -53,13 +53,13 @@ def extrair_texto_pdf(pdf_file):
|
|
53 |
texto_ocr = re.sub(r'\s+', ' ', texto_ocr)
|
54 |
return texto_fitz, texto_ocr
|
55 |
|
56 |
-
# Padr玫es regex
|
57 |
exames = {
|
58 |
"LEUCO": r"leuc[贸o]citos.*?([\d.,]+)\s?(?:10\^3)?/u?l",
|
59 |
"B": r"bas[贸o]filos.*?([\d.,]+)\s?%",
|
60 |
"SS": r"segmentados.*?([\d.,]+)\s?%",
|
61 |
"EOS": r"eosin[贸o]filos.*?([\d.,]+)\s?%",
|
62 |
-
"LINF": r"
|
63 |
"MONO": r"mon[贸o]citos.*?([\d.,]+)\s?%",
|
64 |
"HB": r"hemoglobina.*?([\d.,]+)\s?g/dl",
|
65 |
"HT": r"hemat[贸o]crito.*?([\d.,]+)\s?%",
|
@@ -90,7 +90,7 @@ exames = {
|
|
90 |
"TGO": r"tgo.*?([\d.,]+)\s?u/l",
|
91 |
"TGP": r"tgp.*?([\d.,]+)\s?u/l",
|
92 |
"TAP": r"tempo de protrombina.*?\bresultado\b\s*([\d]+,[\d]+)",
|
93 |
-
"INR": r"
|
94 |
"TTP": r"ttpa.*?([\d.,]+)\s?seg",
|
95 |
"LAC": r"lactato.*?([\d.,]+)\s?mmol/l",
|
96 |
"CKMB": r"ck[- ]?mb.*?([\d.,]+)\s?u/l",
|
@@ -132,7 +132,7 @@ def extrair_exames_formatado(pdf_file):
|
|
132 |
|
133 |
# Interface Gradio
|
134 |
with gr.Blocks() as demo:
|
135 |
-
gr.Markdown("## 馃И Extrator Avan莽ado com OCR - Regex
|
136 |
pdf_file = gr.File(label="馃搫 PDF de exames", file_types=[".pdf"])
|
137 |
btn = gr.Button("馃攳 Extrair Exames")
|
138 |
out_txt = gr.Textbox(label="馃搵 Exames Classificados", lines=10)
|
@@ -140,5 +140,4 @@ with gr.Blocks() as demo:
|
|
140 |
btn.click(extrair_exames_formatado, inputs=pdf_file, outputs=[out_txt, dl])
|
141 |
|
142 |
if __name__ == '__main__':
|
143 |
-
demo.launch()
|
144 |
-
|
|
|
53 |
texto_ocr = re.sub(r'\s+', ' ', texto_ocr)
|
54 |
return texto_fitz, texto_ocr
|
55 |
|
56 |
+
# Padr玫es regex corrigidos para LINF e INR
|
57 |
exames = {
|
58 |
"LEUCO": r"leuc[贸o]citos.*?([\d.,]+)\s?(?:10\^3)?/u?l",
|
59 |
"B": r"bas[贸o]filos.*?([\d.,]+)\s?%",
|
60 |
"SS": r"segmentados.*?([\d.,]+)\s?%",
|
61 |
"EOS": r"eosin[贸o]filos.*?([\d.,]+)\s?%",
|
62 |
+
"LINF": r"linf[o贸]citos.*?([\d.,]+)\s?%",
|
63 |
"MONO": r"mon[贸o]citos.*?([\d.,]+)\s?%",
|
64 |
"HB": r"hemoglobina.*?([\d.,]+)\s?g/dl",
|
65 |
"HT": r"hemat[贸o]crito.*?([\d.,]+)\s?%",
|
|
|
90 |
"TGO": r"tgo.*?([\d.,]+)\s?u/l",
|
91 |
"TGP": r"tgp.*?([\d.,]+)\s?u/l",
|
92 |
"TAP": r"tempo de protrombina.*?\bresultado\b\s*([\d]+,[\d]+)",
|
93 |
+
"INR": r"I\s*N\s*R\s+([\d]+,[\d]+)",
|
94 |
"TTP": r"ttpa.*?([\d.,]+)\s?seg",
|
95 |
"LAC": r"lactato.*?([\d.,]+)\s?mmol/l",
|
96 |
"CKMB": r"ck[- ]?mb.*?([\d.,]+)\s?u/l",
|
|
|
132 |
|
133 |
# Interface Gradio
|
134 |
with gr.Blocks() as demo:
|
135 |
+
gr.Markdown("## 馃И Extrator Avan莽ado com OCR - Regex Final")
|
136 |
pdf_file = gr.File(label="馃搫 PDF de exames", file_types=[".pdf"])
|
137 |
btn = gr.Button("馃攳 Extrair Exames")
|
138 |
out_txt = gr.Textbox(label="馃搵 Exames Classificados", lines=10)
|
|
|
140 |
btn.click(extrair_exames_formatado, inputs=pdf_file, outputs=[out_txt, dl])
|
141 |
|
142 |
if __name__ == '__main__':
|
143 |
+
demo.launch()
|
|