mohammad2928git commited on
Commit
f142a59
·
verified ·
1 Parent(s): d287b63

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -17
app.py CHANGED
@@ -26,17 +26,6 @@ import os
26
  # Function to perform OCR
27
  def ocr(input_file, lang='fas'): # 'fas': Persian language (Farsi)
28
  extracted_text = ""
29
-
30
- # Check if the input file is a PDF or an image
31
- if isinstance(input_file, str) and input_file.endswith('.pdf'): # Check if the file is a PDF
32
- # Convert PDF to images
33
- images = convert_from_path(input_file)
34
-
35
- # Loop through each image and perform OCR
36
- for page_number, image in enumerate(images):
37
- text = pytesseract.image_to_string(image, lang=lang)
38
- extracted_text += text
39
-
40
  elif isinstance(input_file, Image.Image): # If the input is an image
41
  text = pytesseract.image_to_string(input_file, lang=lang)
42
  extracted_text = text
@@ -94,12 +83,10 @@ Generate OCR
94
  """
95
 
96
  def process(input_type, file, lang):
97
- if input_type == "PDF":
98
- extracted_text = ocr(file.name, lang)
99
- else:
100
- # image = Image.open(file.name)
101
- image = file
102
- extracted_text = ocr(image, lang)
103
  return extracted_text
104
 
105
 
 
26
  # Function to perform OCR
27
  def ocr(input_file, lang='fas'): # 'fas': Persian language (Farsi)
28
  extracted_text = ""
 
 
 
 
 
 
 
 
 
 
 
29
  elif isinstance(input_file, Image.Image): # If the input is an image
30
  text = pytesseract.image_to_string(input_file, lang=lang)
31
  extracted_text = text
 
83
  """
84
 
85
  def process(input_type, file, lang):
86
+
87
+ # image = Image.open(file.name)
88
+ image = file
89
+ extracted_text = ocr(image, lang)
 
 
90
  return extracted_text
91
 
92