Kawthar12h commited on
Commit
43c7b9d
1 Parent(s): 67779ca

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +130 -0
app.py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Import needed library
2
+ from PIL import Image
3
+ import gradio as gr
4
+ import torch
5
+ import requests
6
+ import re
7
+ from transformers import pipeline, BlipProcessor, BlipForConditionalGeneration, TrOCRProcessor, VisionEncoderDecoderModel
8
+
9
+ # load image examples
10
+ img_urls_1 = ['https://i.pinimg.com/564x/f7/f5/bd/f7f5bd929e05a852ff423e6e02deea54.jpg', 'https://i.pinimg.com/564x/b4/29/69/b4296962cb76a72354a718109835caa3.jpg',
11
+ 'https://i.pinimg.com/564x/f2/68/8e/f2688eccd6dd60fdad89ef78950b9ead.jpg']
12
+ for idx1, url1 in enumerate(img_urls_1):
13
+ image = Image.open(requests.get(url1, stream=True).raw)
14
+ image.save(f"image_{idx1}.png")
15
+
16
+ # load image examples
17
+ img_urls_2 = ['https://i.pinimg.com/564x/14/b0/07/14b0075ccd5ea35f7deffc9e5bd6de30.jpg', 'https://newsimg.bbc.co.uk/media/images/45510000/jpg/_45510184_the_writings_466_180.jpg',
18
+ 'https://cdn.shopify.com/s/files/1/0047/1524/9737/files/Cetaphil_Face_Wash_Ingredients_Optimized.png?v=1680923920', 'https://github.com/kawther12h/Image_Captioning-and-Text_Recognition/blob/main/handText22.jpg?raw=true','https://github.com/kawther12h/Image_Captioning-and-Text_Recognition/blob/main/handText11.jpg?raw=true']
19
+ for idx2, url2 in enumerate(img_urls_2):
20
+ image = Image.open(requests.get(url2, stream=True).raw)
21
+ image.save(f"tx_image_{idx2}.png")
22
+
23
+ # Load Blip model and processor for captioning
24
+ processor_blip = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
25
+ model_blip = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
26
+
27
+ # Load marefa model for translation (English to Arabic)
28
+ translate = pipeline("translation",model="marefa-nlp/marefa-mt-en-ar")
29
+
30
+ def caption_and_translate(img, min_len, max_len):
31
+ # Generate English caption
32
+ raw_image = Image.open(img).convert('RGB')
33
+ inputs_blip = processor_blip(raw_image, return_tensors="pt")
34
+ out_blip = model_blip.generate(**inputs_blip, min_length=min_len, max_length=max_len)
35
+ english_caption = processor_blip.decode(out_blip[0], skip_special_tokens=True)
36
+
37
+
38
+ # Translate caption from English to Arabic
39
+ arabic_caption = translate(english_caption)
40
+ arabic_caption = arabic_caption[0]['translation_text']
41
+
42
+ translated_caption = f'<div dir="rtl">{arabic_caption}</div>'
43
+
44
+
45
+ # Return both captions
46
+ return english_caption, translated_caption
47
+
48
+
49
+ # Gradio interface with multiple outputs
50
+ img_cap_en_ar = gr.Interface(
51
+ fn=caption_and_translate,
52
+ inputs=[gr.Image(type='filepath', label='Image'),
53
+ gr.Slider(label='Minimum Length', minimum=1, maximum=500, value=30),
54
+ gr.Slider(label='Maximum Length', minimum=1, maximum=500, value=100)],
55
+ outputs=[gr.Textbox(label='English Caption'),
56
+ gr.HTML(label='Arabic Caption')],
57
+ title='Image Captioning | وصف الصورة',
58
+ description="Upload an image to generate an English & Arabic caption | قم برفع صورة وأرسلها ليظهر لك وصف للصورة",
59
+ examples =[["image_0.png"], ["image_1.png"], ["image_2.png"]]
60
+ )
61
+
62
+
63
+ # Load the model
64
+ text_rec = pipeline("image-to-text", model="jinhybr/OCR-Donut-CORD")
65
+
66
+ # Load MarianMT model for translation (English to Arabic)
67
+ translate = pipeline("translation",model="marefa-nlp/marefa-mt-en-ar")
68
+
69
+ # Function to process the image and extract text
70
+ def extract_text(image):
71
+ # Pass the image to the pipeline
72
+ result = text_rec(image)
73
+
74
+ # Extract the plain text and remove tags
75
+ text = result[0]['generated_text']
76
+ text = re.sub(r'<[^>]*>', '', text) # Remove all HTML tags
77
+
78
+ # Translate extracted text from English to Arabic
79
+ arabic_text3 = translate(text)
80
+ arabic_text3 = arabic_text3[0]['translation_text']
81
+ htranslated_text = f'<div dir="rtl">{arabic_text3}</div>'
82
+
83
+ # Return the extracted text
84
+ return text,htranslated_text
85
+
86
+ # Define the Gradio interface
87
+ text_recognition = gr.Interface(
88
+ fn=extract_text, # The function that processes the image
89
+ inputs=gr.Image(type="pil"), # Input is an image (PIL format)
90
+ outputs=[gr.Textbox(label='Extracted text'), gr.HTML(label= 'Translateted of Extracted text ')], # Output is text
91
+ title="Text Extraction and Translation | إستخراج النص وترجمتة",
92
+ description="Upload an image then Submet to extract text and translate it to Arabic| قم برفع الصورة وأرسلها ليظهر لك النص من الصورة",
93
+ examples =[["tx_image_0.png"], ["tx_image_2.png"]],
94
+ )
95
+
96
+ # Load trocr model for handwritten text extraction
97
+ processor = TrOCRProcessor.from_pretrained('microsoft/trocr-base-handwritten')
98
+ model = VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-base-handwritten')
99
+
100
+ # Load MarianMT model for translation (English to Arabic)
101
+ translate = pipeline("translation",model="marefa-nlp/marefa-mt-en-ar")
102
+
103
+ def recognize_handwritten_text(image2):
104
+ # process and and extract text
105
+ pixel_values = processor(images=image2, return_tensors="pt").pixel_values
106
+ generated_ids = model.generate(pixel_values)
107
+ generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
108
+
109
+ # Translate extracted text from English to Arabic
110
+ arabic_text2 = translate(generated_text)
111
+ arabic_text2 = arabic_text2[0]['translation_text']
112
+ htranslated_text = f'<div dir="rtl">{arabic_text2}</div>'
113
+
114
+ # Return the extracted text and translated text
115
+ return generated_text, htranslated_text
116
+
117
+ # Gradio interface with image upload input and text output
118
+ handwritten_rec = gr.Interface(
119
+ fn=recognize_handwritten_text,
120
+ inputs=gr.Image(label="Upload Image"),
121
+ outputs=[gr.Textbox(label='English Text'),
122
+ gr.HTML(label='Arabic Text')],
123
+ title="Handwritten Text Extraction | | إستخراج النص المكتوب بخط اليد وترجمتة",
124
+ description="Upload an image then Submet to extract text and translate it to Arabic| قم برفع الصورة وأرسلها ليظهر لك النص من الصورة",
125
+ examples =[["tx_image_1.png"], ["tx_image_3.png"]]
126
+ )
127
+
128
+ # Combine all interfaces into a tabbed interface
129
+ demo = gr.TabbedInterface([img_cap_en_ar, text_recognition, handwritten_rec], ["Extract_Caption", " Extract_Digital_text", " Extract_HandWritten_text"])
130
+ demo.launch(debug=True, share=True)