Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -26,17 +26,6 @@ import os
|
|
26 |
# Function to perform OCR
|
27 |
def ocr(input_file, lang='fas'): # 'fas': Persian language (Farsi)
|
28 |
extracted_text = ""
|
29 |
-
|
30 |
-
# Check if the input file is a PDF or an image
|
31 |
-
if isinstance(input_file, str) and input_file.endswith('.pdf'): # Check if the file is a PDF
|
32 |
-
# Convert PDF to images
|
33 |
-
images = convert_from_path(input_file)
|
34 |
-
|
35 |
-
# Loop through each image and perform OCR
|
36 |
-
for page_number, image in enumerate(images):
|
37 |
-
text = pytesseract.image_to_string(image, lang=lang)
|
38 |
-
extracted_text += text
|
39 |
-
|
40 |
elif isinstance(input_file, Image.Image): # If the input is an image
|
41 |
text = pytesseract.image_to_string(input_file, lang=lang)
|
42 |
extracted_text = text
|
@@ -94,12 +83,10 @@ Generate OCR
|
|
94 |
"""
|
95 |
|
96 |
def process(input_type, file, lang):
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
image = file
|
102 |
-
extracted_text = ocr(image, lang)
|
103 |
return extracted_text
|
104 |
|
105 |
|
|
|
26 |
# Function to perform OCR
|
27 |
def ocr(input_file, lang='fas'): # 'fas': Persian language (Farsi)
|
28 |
extracted_text = ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
elif isinstance(input_file, Image.Image): # If the input is an image
|
30 |
text = pytesseract.image_to_string(input_file, lang=lang)
|
31 |
extracted_text = text
|
|
|
83 |
"""
|
84 |
|
85 |
def process(input_type, file, lang):
|
86 |
+
|
87 |
+
# image = Image.open(file.name)
|
88 |
+
image = file
|
89 |
+
extracted_text = ocr(image, lang)
|
|
|
|
|
90 |
return extracted_text
|
91 |
|
92 |
|