Kawthar12h
commited on
Commit
•
43c7b9d
1
Parent(s):
67779ca
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Import needed library
|
2 |
+
from PIL import Image
|
3 |
+
import gradio as gr
|
4 |
+
import torch
|
5 |
+
import requests
|
6 |
+
import re
|
7 |
+
from transformers import pipeline, BlipProcessor, BlipForConditionalGeneration, TrOCRProcessor, VisionEncoderDecoderModel
|
8 |
+
|
9 |
+
# load image examples
|
10 |
+
img_urls_1 = ['https://i.pinimg.com/564x/f7/f5/bd/f7f5bd929e05a852ff423e6e02deea54.jpg', 'https://i.pinimg.com/564x/b4/29/69/b4296962cb76a72354a718109835caa3.jpg',
|
11 |
+
'https://i.pinimg.com/564x/f2/68/8e/f2688eccd6dd60fdad89ef78950b9ead.jpg']
|
12 |
+
for idx1, url1 in enumerate(img_urls_1):
|
13 |
+
image = Image.open(requests.get(url1, stream=True).raw)
|
14 |
+
image.save(f"image_{idx1}.png")
|
15 |
+
|
16 |
+
# load image examples
|
17 |
+
img_urls_2 = ['https://i.pinimg.com/564x/14/b0/07/14b0075ccd5ea35f7deffc9e5bd6de30.jpg', 'https://newsimg.bbc.co.uk/media/images/45510000/jpg/_45510184_the_writings_466_180.jpg',
|
18 |
+
'https://cdn.shopify.com/s/files/1/0047/1524/9737/files/Cetaphil_Face_Wash_Ingredients_Optimized.png?v=1680923920', 'https://github.com/kawther12h/Image_Captioning-and-Text_Recognition/blob/main/handText22.jpg?raw=true','https://github.com/kawther12h/Image_Captioning-and-Text_Recognition/blob/main/handText11.jpg?raw=true']
|
19 |
+
for idx2, url2 in enumerate(img_urls_2):
|
20 |
+
image = Image.open(requests.get(url2, stream=True).raw)
|
21 |
+
image.save(f"tx_image_{idx2}.png")
|
22 |
+
|
23 |
+
# Load Blip model and processor for captioning
|
24 |
+
processor_blip = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
|
25 |
+
model_blip = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
|
26 |
+
|
27 |
+
# Load marefa model for translation (English to Arabic)
|
28 |
+
translate = pipeline("translation",model="marefa-nlp/marefa-mt-en-ar")
|
29 |
+
|
30 |
+
def caption_and_translate(img, min_len, max_len):
|
31 |
+
# Generate English caption
|
32 |
+
raw_image = Image.open(img).convert('RGB')
|
33 |
+
inputs_blip = processor_blip(raw_image, return_tensors="pt")
|
34 |
+
out_blip = model_blip.generate(**inputs_blip, min_length=min_len, max_length=max_len)
|
35 |
+
english_caption = processor_blip.decode(out_blip[0], skip_special_tokens=True)
|
36 |
+
|
37 |
+
|
38 |
+
# Translate caption from English to Arabic
|
39 |
+
arabic_caption = translate(english_caption)
|
40 |
+
arabic_caption = arabic_caption[0]['translation_text']
|
41 |
+
|
42 |
+
translated_caption = f'<div dir="rtl">{arabic_caption}</div>'
|
43 |
+
|
44 |
+
|
45 |
+
# Return both captions
|
46 |
+
return english_caption, translated_caption
|
47 |
+
|
48 |
+
|
49 |
+
# Gradio interface with multiple outputs
|
50 |
+
img_cap_en_ar = gr.Interface(
|
51 |
+
fn=caption_and_translate,
|
52 |
+
inputs=[gr.Image(type='filepath', label='Image'),
|
53 |
+
gr.Slider(label='Minimum Length', minimum=1, maximum=500, value=30),
|
54 |
+
gr.Slider(label='Maximum Length', minimum=1, maximum=500, value=100)],
|
55 |
+
outputs=[gr.Textbox(label='English Caption'),
|
56 |
+
gr.HTML(label='Arabic Caption')],
|
57 |
+
title='Image Captioning | وصف الصورة',
|
58 |
+
description="Upload an image to generate an English & Arabic caption | قم برفع صورة وأرسلها ليظهر لك وصف للصورة",
|
59 |
+
examples =[["image_0.png"], ["image_1.png"], ["image_2.png"]]
|
60 |
+
)
|
61 |
+
|
62 |
+
|
63 |
+
# Load the model
|
64 |
+
text_rec = pipeline("image-to-text", model="jinhybr/OCR-Donut-CORD")
|
65 |
+
|
66 |
+
# Load MarianMT model for translation (English to Arabic)
|
67 |
+
translate = pipeline("translation",model="marefa-nlp/marefa-mt-en-ar")
|
68 |
+
|
69 |
+
# Function to process the image and extract text
|
70 |
+
def extract_text(image):
|
71 |
+
# Pass the image to the pipeline
|
72 |
+
result = text_rec(image)
|
73 |
+
|
74 |
+
# Extract the plain text and remove tags
|
75 |
+
text = result[0]['generated_text']
|
76 |
+
text = re.sub(r'<[^>]*>', '', text) # Remove all HTML tags
|
77 |
+
|
78 |
+
# Translate extracted text from English to Arabic
|
79 |
+
arabic_text3 = translate(text)
|
80 |
+
arabic_text3 = arabic_text3[0]['translation_text']
|
81 |
+
htranslated_text = f'<div dir="rtl">{arabic_text3}</div>'
|
82 |
+
|
83 |
+
# Return the extracted text
|
84 |
+
return text,htranslated_text
|
85 |
+
|
86 |
+
# Define the Gradio interface
|
87 |
+
text_recognition = gr.Interface(
|
88 |
+
fn=extract_text, # The function that processes the image
|
89 |
+
inputs=gr.Image(type="pil"), # Input is an image (PIL format)
|
90 |
+
outputs=[gr.Textbox(label='Extracted text'), gr.HTML(label= 'Translateted of Extracted text ')], # Output is text
|
91 |
+
title="Text Extraction and Translation | إستخراج النص وترجمتة",
|
92 |
+
description="Upload an image then Submet to extract text and translate it to Arabic| قم برفع الصورة وأرسلها ليظهر لك النص من الصورة",
|
93 |
+
examples =[["tx_image_0.png"], ["tx_image_2.png"]],
|
94 |
+
)
|
95 |
+
|
96 |
+
# Load trocr model for handwritten text extraction
|
97 |
+
processor = TrOCRProcessor.from_pretrained('microsoft/trocr-base-handwritten')
|
98 |
+
model = VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-base-handwritten')
|
99 |
+
|
100 |
+
# Load MarianMT model for translation (English to Arabic)
|
101 |
+
translate = pipeline("translation",model="marefa-nlp/marefa-mt-en-ar")
|
102 |
+
|
103 |
+
def recognize_handwritten_text(image2):
|
104 |
+
# process and and extract text
|
105 |
+
pixel_values = processor(images=image2, return_tensors="pt").pixel_values
|
106 |
+
generated_ids = model.generate(pixel_values)
|
107 |
+
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
108 |
+
|
109 |
+
# Translate extracted text from English to Arabic
|
110 |
+
arabic_text2 = translate(generated_text)
|
111 |
+
arabic_text2 = arabic_text2[0]['translation_text']
|
112 |
+
htranslated_text = f'<div dir="rtl">{arabic_text2}</div>'
|
113 |
+
|
114 |
+
# Return the extracted text and translated text
|
115 |
+
return generated_text, htranslated_text
|
116 |
+
|
117 |
+
# Gradio interface with image upload input and text output
|
118 |
+
handwritten_rec = gr.Interface(
|
119 |
+
fn=recognize_handwritten_text,
|
120 |
+
inputs=gr.Image(label="Upload Image"),
|
121 |
+
outputs=[gr.Textbox(label='English Text'),
|
122 |
+
gr.HTML(label='Arabic Text')],
|
123 |
+
title="Handwritten Text Extraction | | إستخراج النص المكتوب بخط اليد وترجمتة",
|
124 |
+
description="Upload an image then Submet to extract text and translate it to Arabic| قم برفع الصورة وأرسلها ليظهر لك النص من الصورة",
|
125 |
+
examples =[["tx_image_1.png"], ["tx_image_3.png"]]
|
126 |
+
)
|
127 |
+
|
128 |
+
# Combine all interfaces into a tabbed interface
|
129 |
+
demo = gr.TabbedInterface([img_cap_en_ar, text_recognition, handwritten_rec], ["Extract_Caption", " Extract_Digital_text", " Extract_HandWritten_text"])
|
130 |
+
demo.launch(debug=True, share=True)
|