DeepDiveDev commited on
Commit
913b0ce
·
verified ·
1 Parent(s): 16673d3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +108 -92
app.py CHANGED
@@ -1,107 +1,123 @@
1
- import gradio as gr
2
- import easyocr
3
  from PIL import Image
4
- import pdf2image
5
- import tempfile
6
- import os
7
- import cv2
8
- import numpy as np
9
  import torch
 
 
10
 
11
- # Initialize the OCR reader (this will download models on first run)
12
- reader = easyocr.Reader(['en'], gpu=torch.cuda.is_available())
 
 
 
 
 
 
 
13
 
14
- def preprocess_image(img):
15
- """Preprocess image to improve OCR accuracy for handwritten text"""
16
- # Convert PIL Image to numpy array
17
- img_array = np.array(img)
18
-
19
- # Check if the image is already grayscale
20
- if len(img_array.shape) == 2:
21
- gray = img_array
22
- else:
23
- gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY)
24
-
25
- # Apply adaptive thresholding for better handling of different lighting conditions
26
- binary = cv2.adaptiveThreshold(
27
- gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2
28
- )
 
 
 
 
 
 
 
 
 
29
 
30
- # Noise removal
31
- kernel = np.ones((1, 1), np.uint8)
32
- binary = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
33
- binary = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel)
34
 
35
- return binary
36
 
37
- def extract_text_from_image(img):
38
- """Extract text from an image using EasyOCR"""
39
- # Preprocess for better handwriting recognition
40
- processed_img = preprocess_image(img)
41
 
42
- # Use EasyOCR to extract text
43
- results = reader.readtext(processed_img)
44
-
45
- # Combine all detected text
46
- text = '\n'.join([result[1] for result in results])
 
 
 
 
47
 
48
- return text.strip()
49
 
50
- def extract_text_from_pdf(pdf_path):
51
- """Extract text from all pages of a PDF file"""
52
- # Convert PDF to images
53
- with tempfile.TemporaryDirectory() as path:
54
- images = pdf2image.convert_from_path(pdf_path, output_folder=path)
55
-
56
- # Extract text from each page
57
- full_text = []
58
- for img in images:
59
- text = extract_text_from_image(img)
60
- full_text.append(text)
61
-
62
- return "\n\n--- Page Break ---\n\n".join(full_text)
63
 
64
- def process_file(file):
65
- """Process the uploaded file (PDF or image)"""
66
- if file is None:
67
- return "No file uploaded. Please upload an image or PDF file."
68
-
69
- try:
70
- file_extension = os.path.splitext(file.name)[1].lower()
71
-
72
- if file_extension == ".pdf":
73
- # Process PDF
74
- return extract_text_from_pdf(file.name)
75
- elif file_extension in [".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif"]:
76
- # Process Image
77
- img = Image.open(file.name)
78
- return extract_text_from_image(img)
79
- else:
80
- return "Unsupported file format. Please upload a PDF or image file (JPG, PNG, BMP, TIFF)."
81
- except Exception as e:
82
- return f"Error processing file: {str(e)}"
83
-
84
- # Create Gradio interface
85
- with gr.Blocks(title="Handwritten Text OCR Extractor") as app:
86
- gr.Markdown("# Handwritten Text OCR Extraction Tool")
87
- gr.Markdown("Upload an image or PDF containing handwritten text to extract the content.")
88
-
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  with gr.Row():
90
- with gr.Column():
91
- file_input = gr.File(label="Upload Image or PDF")
92
- extract_button = gr.Button("Extract Text")
 
93
 
94
- with gr.Column():
95
- text_output = gr.Textbox(label="Extracted Text", lines=10, placeholder="Extracted text will appear here...")
 
 
 
 
96
 
97
- extract_button.click(fn=process_file, inputs=[file_input], outputs=[text_output])
 
98
 
99
- gr.Markdown("### Notes:")
100
- gr.Markdown("- For best results, ensure the handwriting is clear and the image is well-lit")
101
- gr.Markdown("- The system works best with dark text on light background")
102
- gr.Markdown("- The first run may take longer as it downloads the OCR models")
103
- gr.Markdown("- Multiple page PDFs will show page breaks in the output")
104
-
105
- # Launch the app
106
- if __name__ == "__main__":
107
- app.launch()
 
1
+ import PIL
 
2
  from PIL import Image
3
+ from PIL import ImageDraw
4
+ import gradio as gr
 
 
 
5
  import torch
6
+ import easyocr
7
+ import re
8
 
9
+ # Download example images (same as before)
10
+ # Download example images
11
+ torch.hub.download_url_to_file('https://github.com/JaidedAI/EasyOCR/raw/master/examples/english.png', 'english.png')
12
+ torch.hub.download_url_to_file('https://i.imgur.com/mwQFd7G.jpeg', 'Hindi.jpeg')
13
+ torch.hub.download_url_to_file('https://github.com/JaidedAI/EasyOCR/raw/master/examples/thai.jpg', 'thai.jpg')
14
+ torch.hub.download_url_to_file('https://github.com/JaidedAI/EasyOCR/raw/master/examples/french.jpg', 'french.jpg')
15
+ torch.hub.download_url_to_file('https://github.com/JaidedAI/EasyOCR/raw/master/examples/chinese.jpg', 'chinese.jpg')
16
+ torch.hub.download_url_to_file('https://github.com/JaidedAI/EasyOCR/raw/master/examples/japanese.jpg', 'japanese.jpg')
17
+ torch.hub.download_url_to_file('https://github.com/JaidedAI/EasyOCR/raw/master/examples/korean.png', 'korean.png')
18
 
19
+ def draw_boxes(image, bounds, color='yellow', width=2):
20
+ draw = ImageDraw.Draw(image)
21
+ for bound in bounds:
22
+ p0, p1, p2, p3 = bound[0]
23
+ draw.line([*p0, *p1, *p2, *p3, *p0], fill=color, width=width)
24
+ return image
25
+
26
+ def format_extracted_text(bounds):
27
+ return " ".join([text for _, text, _ in bounds])
28
+
29
+ def highlight_search_results(text, search_query):
30
+ if not search_query:
31
+ return text, []
32
+ pattern = re.compile(re.escape(search_query), re.IGNORECASE)
33
+ matches = list(pattern.finditer(text))
34
+ highlighted_text = pattern.sub(lambda m: f"**{m.group()}**", text)
35
+ return highlighted_text, matches
36
+
37
+ def inference(img, lang):
38
+ reader = easyocr.Reader(lang)
39
+ bounds = reader.readtext(img)
40
+ im = PIL.Image.open(img)
41
+ draw_boxes(im, bounds)
42
+ im.save('result.jpg')
43
 
44
+ extracted_text = format_extracted_text(bounds)
 
 
 
45
 
46
+ return ['result.jpg', extracted_text]
47
 
48
+ def search_text(text, search_query):
49
+ highlighted_text, matches = highlight_search_results(text, search_query)
 
 
50
 
51
+ if matches:
52
+ result = f"Found {len(matches)} occurrence(s) of \"{search_query}\":\n"
53
+ for i, match in enumerate(matches, 1):
54
+ context_start = max(0, match.start() - 20)
55
+ context_end = min(len(text), match.end() + 20)
56
+ context = text[context_start:context_end]
57
+ result += f"{i}. ...{context}...\n"
58
+ else:
59
+ result = f"No occurrences of \"{search_query}\" found."
60
 
61
+ return highlighted_text, result
62
 
63
+ title = 'Image To Text OCR Converter'
64
+ subtitle = 'Extract Hindi/English or both or any Text From Image'
65
+ description = 'This application is being built on the request of IIT R Internship Assignment. It allows users to upload a single image, processes the image to extract text using OCR, and provides a basic search feature.'
 
 
 
 
 
 
 
 
 
 
66
 
67
+ note = 'Please keep patience while processing the OCR, as it may take a few seconds to complete'
68
+
69
+
70
+ alternative_link = "[Alternative: Ready-to-use OCR using Vercel](https://iitr-haq-nawaz-maliks-projects.vercel.app/)"
71
+
72
+ examples = [
73
+ ['english.png', ['en']],
74
+ ['Hindi.jpeg', ['hi', 'en']],
75
+ ['thai.jpg', ['th', 'en']],
76
+ ['french.jpg', ['fr', 'en']],
77
+ ['chinese.jpg', ['ch_sim', 'en']],
78
+ ['japanese.jpg', ['ja', 'en' ]],
79
+ ['korean.png', ['ko', 'en' ]]
80
+ ]
81
+
82
+ css = """
83
+ .output_image, .input_image {height: 40rem !important; width: 100% !important;}
84
+ .search_results {margin-top: 1rem; padding: 1rem; background-color: #f0f0f0; border-radius: 4px;}
85
+ .centered-title {text-align: center; font-size: 2.5em; font-weight: bold; margin-bottom: 0.5em;}
86
+ .centered-subtitle {text-align: center; font-size: 1.5em; margin-bottom: 1em;}
87
+ .alternative-link {text-align: center; margin-top: 1em; font-style: italic;}
88
+ """
89
+
90
+
91
+ choices = [
92
+ "abq", "ady", "af", "ang", "ar", "as", "ava", "az", "be", "bg", "bh", "bho", "bn", "bs", "ch_sim", "ch_tra",
93
+ "che", "cs", "cy", "da", "dar", "de", "en", "es", "et", "fa", "fr", "ga", "gom", "hi", "hr", "hu", "id",
94
+ "inh", "is", "it", "ja", "kbd", "kn", "ko", "ku", "la", "lbe", "lez", "lt", "lv", "mah", "mai", "mi", "mn",
95
+ "mr", "ms", "mt", "ne", "new", "nl", "no", "oc", "pi", "pl", "pt", "ro", "ru", "rs_cyrillic", "rs_latin",
96
+ "sck", "sk", "sl", "sq", "sv", "sw", "ta", "tab", "te", "th", "tjk", "tl", "tr", "ug", "uk", "ur", "uz", "vi"
97
+ ]
98
+
99
+ with gr.Blocks(css=css) as iface:
100
+ gr.Markdown(f"# {title}")
101
+ gr.Markdown(f"## {subtitle}")
102
+ gr.Markdown(description)
103
+ gr.Markdown(note)
104
+ gr.Markdown(alternative_link)
105
  with gr.Row():
106
+ with gr.Column(scale=2):
107
+ input_image = gr.Image(type="filepath", label="Upload Image")
108
+ lang_select = gr.CheckboxGroup(choices=choices, label="Select Languages", value=['hi', 'en'])
109
+ ocr_button = gr.Button("Perform OCR")
110
 
111
+ with gr.Column(scale=3):
112
+ output_image = gr.Image(type="filepath", label="OCR Result")
113
+ extracted_text = gr.Markdown(label="Extracted Text")
114
+ search_box = gr.Textbox(label="Search in extracted text")
115
+ search_button = gr.Button("Search")
116
+ search_results = gr.Markdown(label="Search Results")
117
 
118
+ ocr_button.click(inference, inputs=[input_image, lang_select], outputs=[output_image, extracted_text])
119
+ search_button.click(search_text, inputs=[extracted_text, search_box], outputs=[extracted_text, search_results])
120
 
121
+ gr.Examples(examples, inputs=[input_image, lang_select])
122
+
123
+ iface.launch()