Spaces:

Kawthar12h
/

Image_Captioning_Text_Recognition

Sleeping

App Files Files Community

Kawthar12h commited on Oct 1, 2024

Commit

7b19271

verified ·

1 Parent(s): 2a7bf63

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -21

app.py CHANGED Viewed

@@ -29,46 +29,35 @@ translate = pipeline("translation",model="marefa-nlp/marefa-mt-en-ar")
 def caption_and_translate(img, min_len, max_len):
     # Generate English caption
-    # It takes image and convert it to the RGB color
-    raw_image = Image.open(img).convert('RGB')
-    #prepares the image data for input to the Blip model
     inputs_blip = processor_blip(raw_image, return_tensors="pt")
-    #generates an English caption for the image
-    out_blip = model_blip.generate(**inputs_blip, min_length=min_len, max_length=max_len)
     english_caption = processor_blip.decode(out_blip[0], skip_special_tokens=True)
     # Translate caption from English to Arabic
     arabic_caption = translate(english_caption)
     arabic_caption = arabic_caption[0]['translation_text']
-    # The Arabic caption is formatted with right-to-left directionality.
     translated_caption = f'<div dir="rtl">{arabic_caption}</div>'
-    # Return both caption and translated caption
     return english_caption, translated_caption
 # Gradio interface with multiple outputs
 img_cap_en_ar = gr.Interface(
-    fn=caption_and_translate, # The function that processes the image
-    #type='filepath'
-    #Users can upload an image and adjust the minimum and maximum caption lengths
-    inputs=[gr.Image(type='pil', label='Image'),
-            gr.Slider(label='Minimum Length', minimum=1, maximum=500, value=30),
-            gr.Slider(label='Maximum Length', minimum=1, maximum=500, value=100)],
     outputs=[gr.Textbox(label='English Caption'),
              gr.HTML(label='Arabic Caption')],
     title='Image Captioning | وصف الصورة',
     description="Upload an image to generate an English & Arabic caption | قم برفع صورة وأرسلها ليظهر لك وصف للصورة",
-    examples =[["image_0.png"], ["image_2.png"]]
 )
 # Load the model
 text_rec = pipeline("image-to-text", model="jinhybr/OCR-Donut-CORD")
@@ -126,7 +115,7 @@ def recognize_handwritten_text(image2):
 # Gradio interface with image upload input and text output
 handwritten_rec = gr.Interface(
     fn=recognize_handwritten_text,
-    inputs=gr.Image(type="pil"),
     outputs=[gr.Textbox(label='English Text'),
              gr.HTML(label='Arabic Text')],
     title="Handwritten Text Extraction | | إستخراج النص المكتوب بخط اليد وترجمتة",

 def caption_and_translate(img, min_len, max_len):
     # Generate English caption
+    raw_image = Image.open(img).convert('RGB')
     inputs_blip = processor_blip(raw_image, return_tensors="pt")
+    out_blip = model_blip.generate(**inputs_blip, min_length=70, max_length=1000)
     english_caption = processor_blip.decode(out_blip[0], skip_special_tokens=True)
     # Translate caption from English to Arabic
     arabic_caption = translate(english_caption)
     arabic_caption = arabic_caption[0]['translation_text']
     translated_caption = f'<div dir="rtl">{arabic_caption}</div>'
+    # Return both captions
     return english_caption, translated_caption
 # Gradio interface with multiple outputs
 img_cap_en_ar = gr.Interface(
+    fn=caption_and_translate,
+    inputs=[gr.Image(type='filepath', label='Image')],
+            #gr.Slider(label='Minimum Length', minimum=1, maximum=500, value=30),
+            #gr.Slider(label='Maximum Length', minimum=1, maximum=500, value=100)],
     outputs=[gr.Textbox(label='English Caption'),
              gr.HTML(label='Arabic Caption')],
     title='Image Captioning | وصف الصورة',
     description="Upload an image to generate an English & Arabic caption | قم برفع صورة وأرسلها ليظهر لك وصف للصورة",
+    examples =[["image_2.png"]]
 )
 # Load the model
 text_rec = pipeline("image-to-text", model="jinhybr/OCR-Donut-CORD")
 # Gradio interface with image upload input and text output
 handwritten_rec = gr.Interface(
     fn=recognize_handwritten_text,
+    inputs=gr.Image(label="Upload Image"),
     outputs=[gr.Textbox(label='English Text'),
              gr.HTML(label='Arabic Text')],
     title="Handwritten Text Extraction | | إستخراج النص المكتوب بخط اليد وترجمتة",