Rehan3024 commited on
Commit
a8987f9
·
verified ·
1 Parent(s): cdfbcaa
Files changed (1) hide show
  1. note.py +106 -0
note.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """note
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1QURIEiSXUEJNZgmvvRYhRiCHT2gYx97F
8
+ """
9
+
10
+ from PIL import Image
11
+ from transformers import BlipProcessor, BlipForConditionalGeneration, BlipForQuestionAnswering, MarianMTModel, MarianTokenizer
12
+ import gradio as gr
13
+ import torch
14
+ import warnings
15
+
16
+ warnings.filterwarnings("ignore")
17
+
18
+ # Load BLIP models
19
+ captioning_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
20
+ captioning_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
21
+
22
+ processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base")
23
+ model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-base")
24
+
25
+ # Dictionary to store translation models and tokenizers for different languages
26
+ translation_models = {
27
+ "Spanish": 'Helsinki-NLP/opus-mt-en-es',
28
+ "German": 'Helsinki-NLP/opus-mt-en-de',
29
+ "Chinese": 'Helsinki-NLP/opus-mt-en-zh',
30
+ "Japanese": 'Helsinki-NLP/opus-mt-en-ja',
31
+ "Russian": 'Helsinki-NLP/opus-mt-en-ru',
32
+ "Arabic": 'Helsinki-NLP/opus-mt-en-ar',
33
+ "Hindi": 'Helsinki-NLP/opus-mt-en-hi',
34
+ "Urdu": 'Helsinki-NLP/opus-mt-en-ur'
35
+ }
36
+
37
+ # Load translation models and tokenizers
38
+ loaded_translation_models = {}
39
+ loaded_translation_tokenizers = {}
40
+
41
+ for lang, model_name in translation_models.items():
42
+ try:
43
+ loaded_translation_models[lang] = MarianMTModel.from_pretrained(model_name)
44
+ loaded_translation_tokenizers[lang] = MarianTokenizer.from_pretrained(model_name)
45
+ except Exception as e:
46
+ print(f"Error loading model for {lang}: {e}")
47
+
48
+ # Captioning function
49
+ def caption(image):
50
+ image = image.convert("RGB")
51
+ inputs = captioning_processor(image, return_tensors="pt")
52
+ out = captioning_model.generate(**inputs)
53
+ return captioning_processor.decode(out[0], skip_special_tokens=True)
54
+
55
+ # Visual Question Answering function
56
+ def qna(image, question):
57
+ image = image.convert("RGB")
58
+ inputs = processor(image, question, return_tensors="pt")
59
+ out = model.generate(**inputs)
60
+ return processor.decode(out[0], skip_special_tokens=True)
61
+
62
+ # Translation function
63
+ def translate_text(text, target_lang="Spanish"):
64
+ model = loaded_translation_models.get(target_lang)
65
+ tokenizer = loaded_translation_tokenizers.get(target_lang)
66
+ if model is None or tokenizer is None:
67
+ return f"Translation model for {target_lang} is not available."
68
+ inputs = tokenizer(text, return_tensors="pt")
69
+ translated = model.generate(**inputs)
70
+ return tokenizer.decode(translated[0], skip_special_tokens=True)
71
+
72
+ # Combined Captioning and Translation function
73
+ def caption_and_translate(image, target_lang="Spanish"):
74
+ caption_text = caption(image)
75
+ translated_caption = translate_text(caption_text, target_lang)
76
+ return caption_text, translated_caption
77
+
78
+ # Create Gradio interfaces
79
+ interface1 = gr.Interface(fn=caption,
80
+ inputs=gr.components.Image(type="pil"),
81
+ outputs=gr.components.Textbox(label="Generated Caption by BLIP"),
82
+ description="BLIP Image Captioning")
83
+
84
+ interface2 = gr.Interface(fn=qna,
85
+ inputs=[gr.components.Image(type="pil"), gr.components.Textbox(label="Question")],
86
+ outputs=gr.components.Textbox(label="Answer generated by BLIP"),
87
+ description="BLIP Visual Question Answering of Images")
88
+
89
+ interface3 = gr.Interface(fn=caption_and_translate,
90
+ inputs=[gr.components.Image(type="pil"), gr.components.Dropdown(label="Target Language", choices=["Spanish", "German", "Chinese", "Japanese", "Russian", "Arabic", "Hindi", "Urdu"])],
91
+ outputs=[gr.components.Textbox(label="Generated Caption"),
92
+ gr.components.Textbox(label="Translated Caption")],
93
+ description="Image Captioning and Translation")
94
+
95
+ title = "Automated Image Captioning and Visual QnA Engine"
96
+
97
+ final_interface = gr.TabbedInterface([interface1, interface2, interface3],
98
+ ["Captioning", "Visual QnA", "Captioning and Translation"],
99
+ title=title, theme=gr.themes.Soft())
100
+
101
+ final_interface.launch(inbrowser=True)
102
+
103
+ import gradio as gr
104
+
105
+ !pip install transformers gradio torch Pillow sacremoses
106
+