Spaces:

mohammad2928git
/

OCR_V1

Running

mohammad2928git commited on 28 days ago

Commit

f142a59

verified ·

1 Parent(s): d287b63

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -26,17 +26,6 @@ import os
 # Function to perform OCR
 def ocr(input_file, lang='fas'):  # 'fas': Persian language (Farsi)
     extracted_text = ""
-    # Check if the input file is a PDF or an image
-    if isinstance(input_file, str) and input_file.endswith('.pdf'):  # Check if the file is a PDF
-        # Convert PDF to images
-        images = convert_from_path(input_file)
-        # Loop through each image and perform OCR
-        for page_number, image in enumerate(images):
-            text = pytesseract.image_to_string(image, lang=lang)
-            extracted_text += text
     elif isinstance(input_file, Image.Image):  # If the input is an image
         text = pytesseract.image_to_string(input_file, lang=lang)
         extracted_text = text
@@ -94,12 +83,10 @@ Generate OCR
 """
 def process(input_type, file, lang):
-    if input_type == "PDF":
-        extracted_text = ocr(file.name, lang)
-    else:
-        # image = Image.open(file.name)
-        image = file
-        extracted_text = ocr(image, lang)
     return extracted_text

 # Function to perform OCR
 def ocr(input_file, lang='fas'):  # 'fas': Persian language (Farsi)
     extracted_text = ""
     elif isinstance(input_file, Image.Image):  # If the input is an image
         text = pytesseract.image_to_string(input_file, lang=lang)
         extracted_text = text
 """
 def process(input_type, file, lang):
+    # image = Image.open(file.name)
+    image = file
+    extracted_text = ocr(image, lang)
     return extracted_text