Spaces:
Running
on
Zero
Running
on
Zero
zamalali
commited on
Commit
·
eb3e47a
1
Parent(s):
5a8ab80
Add pymupdf import to utils.py for enhanced PDF processing
Browse files
utils.py
CHANGED
@@ -4,6 +4,8 @@ import io
|
|
4 |
import gradio as gr
|
5 |
import base64
|
6 |
import pandas as pd
|
|
|
|
|
7 |
|
8 |
def image_to_bytes(image):
|
9 |
img_byte_arr = io.BytesIO()
|
@@ -48,4 +50,4 @@ def clean_text(text):
|
|
48 |
cleaned_text = cleaned_text.replace("\t", " ")
|
49 |
cleaned_text = cleaned_text.replace(" ", " ")
|
50 |
cleaned_text = cleaned_text.strip()
|
51 |
-
return cleaned_text
|
|
|
4 |
import gradio as gr
|
5 |
import base64
|
6 |
import pandas as pd
|
7 |
+
import pymupdf
|
8 |
+
|
9 |
|
10 |
def image_to_bytes(image):
|
11 |
img_byte_arr = io.BytesIO()
|
|
|
50 |
cleaned_text = cleaned_text.replace("\t", " ")
|
51 |
cleaned_text = cleaned_text.replace(" ", " ")
|
52 |
cleaned_text = cleaned_text.strip()
|
53 |
+
return cleaned_text
|